For full transparency and reproducibility, this Rmd file shows all the enrichment calculations performed and their results for completeness. This can be found as a Supplemental File under Truong et al. (in preparation).

Data Import and Environment

rm(list=ls())
library(data.table)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggrepel)
library(ggvenn)
library(cowplot)
library(viridis)
library(RColorBrewer)
library(colorspace)
library(scales)
library(ggpubr)
library(openxlsx)
library(readxl)

load('./data/invitrodb_v3_5_thyroid_data.RData', verbose = T)
## Loading objects:
##   ace
##   ace.thyroid
##   cytotox
##   gene
##   mc5
##   mc5.hitc1
##   sc1
##   sc2
# create a `not in` operator
`%notin%` <- Negate(`%in%`)

Overview of Classes

meta data

# DIO/IYD assays
deiod.meta <- ace.thyroid[thyroid_related_target == 'Deiodinases', 
                          .(aeid, aenm = assay_component_endpoint_name)]

aenm.abbrevs <- c('DIO1', 'DIO2', 'DIO3', 'IYD')

# ignore amphibian assays 
h.deiod.aeids <- deiod.meta[grepl("\\w+h(DIO|IYD)\\w+", deiod.meta$aenm), aeid]
h.deiod.aenm <- deiod.meta[grepl("\\w+h(DIO|IYD)\\w+", deiod.meta$aenm), aenm]
names(h.deiod.aeids) <- aenm.abbrevs
names(h.deiod.aenm) <- aenm.abbrevs

Hitcall Matrix

# get hitcall data from sc/mc for DIO/IYD assays 
long.mc5 <- mc5[!is.na(dsstox_substance_id) & aeid %in% h.deiod.aeids, 
                .(dsstox_substance_id, aenm, hitc)]
long.sc2 <- sc2[!is.na(dsstox_substance_id) & aeid %in% h.deiod.aeids, 
                .(dsstox_substance_id, aenm, hitc)]

# combine mc/sc data
long_all <- merge.data.table(long.sc2, long.mc5, 
                             by = c("dsstox_substance_id", "aenm"), 
                             all = T)
setnames(long_all, old = c("hitc.x", "hitc.y"), 
         new = c("sc.hitc", "mc.hitc"))

# if tested in both, take hitc from mc; otherwise, take hitc from sc 
long_hitc_final <- long_all %>% 
  rowwise() %>%
  mutate(hitc = ifelse(!is.na(mc.hitc), mc.hitc, sc.hitc))

# form a hitcall matrix of all 2007 tested chems 
M.hitc <- long_hitc_final %>% 
  select(dsstox_substance_id, aenm, hitc) %>% 
  pivot_wider(names_from = aenm, values_from = hitc) %>% 
  as.data.frame()

M.hitc$chnm <- sc2$chnm[match(M.hitc$dsstox_substance_id, sc2$dsstox_substance_id)]
M.hitc <- M.hitc[, c("dsstox_substance_id", "chnm", h.deiod.aenm)]

row.names(M.hitc) <- M.hitc$dsstox_substance_id
M.hitc <- M.hitc[, -1]

Hitcall Distribution

# load updated classifications from Richard - 9/5/23
new.classified <- read_excel(file.path("./data", "chem_classifications.xlsx")) %>% 
  as.data.table()

# move subset of QACs from "Expert" to "ClassyFire" group
new.classified[class_type == "Expert" & chosen_class == "Quaternary ammonium salts", class_type := "ClassyFire"]

# subset down to DIO/IYD test chems 
classified_dt <- new.classified[dtxsid %in% rownames(M.hitc)]
setnames(classified_dt, "dtxsid", "dsstox_substance_id")

# match chemical order with M.hitc for 1-1 correspondence 
classified_dt <- classified_dt[match(rownames(M.hitc), classified_dt$dsstox_substance_id), ]
identical(rownames(M.hitc), classified_dt$dsstox_substance_id)
## [1] TRUE
counts <- classified_dt[, .N, keyby = .(class_type, chosen_class)][
  order(-N), .SD, keyby = .(class_type)
]

setDT(long_hitc_final)

# add selectivity as a column to long_hitc_final for better interpretation of hits 
mc.deiod <- mc5[aeid %in% h.deiod.aeids & !is.na(dsstox_substance_id), .(chnm, aenm, modl_acc, modl_ga), 
                by = .(aeid, dsstox_substance_id)]
mc.deiod[is.na(modl_acc), modl_acc := 5]
mc.deiod$cytomed <- cytotox$cytotox_median_log[match(mc.deiod$dsstox_substance_id, cytotox$dsstox_substance_id)]

# positive controls for DIO2/3/IYD are missing cytotox points 
mc.deiod[is.na(cytomed)]
mc.deiod[is.na(cytomed), cytomed := 3]
mc.deiod[, selectivity := cytomed - pmin(modl_acc, 5)]

Impute cytotox median AC50 values for Xanthohumol and 3-Nitro-L-tyrosine to 3 (log10 uM).

long_hitc_final2 <- merge.data.table(long_hitc_final, 
                                     mc.deiod[, .(dsstox_substance_id, aenm, modl_acc, modl_ga, selectivity)], 
                                     by = c("dsstox_substance_id", "aenm"), 
                                     all.x = T)

# use selectivity as a stricter threshold for hitcall determination 
long_hitc_final2[selectivity < 0.3, hitc := 0]

# merging hitcalls with class information
merged_dat <- merge.data.table(long_hitc_final, 
                               classified_dt[, .(dsstox_substance_id, name, class_type, chosen_class)], 
                               by = c("dsstox_substance_id"), 
                               all.x = T)

for (i in 1:length(h.deiod.aenm)) {
  merged_dat[aenm == h.deiod.aenm[i], assay := names(h.deiod.aenm)[i] ]
}

# total number of chems tested in each assay (before classification)
merged_dat[, Nchems := length(dsstox_substance_id), keyby = aenm]
setnames(merged_dat, "Nchems", "Ntested")

# How many were classified in each assay? should be ALL thx to Richard 
merged_dat[!is.na(chosen_class), .( Nclassified = length(dsstox_substance_id) ), 
           keyby = aenm]
# distribution of pos/neg chems with classifications for each assay
hitc.dist <- merged_dat[, .( numChems = length(dsstox_substance_id) ), 
           keyby = .(aenm, Ntested, hitc)]
hitc.dist[, hitrate := numChems/Ntested, by = .(aenm, hitc)]
hitc.dist

100% classified with new classification scheme.

Class Composition by Assay

The top 25 classes (or groups of classes) for each chemical library are shown here. Classes that made up less than 1% of the library for that assay shown here were regrouped into a broader and related “Other [classes from Classyfire|Drugs|Expert chemicals|Pesticides|PFAS categories]” category.

# distribution of classes for test libraries 
counts.by.aenm <- merged_dat[, .( .N ), keyby = .(aenm, class_type, chosen_class)]

# sorted by counts from highest to lowest, grouped by aenm 
counts.by.aenm <- counts.by.aenm[order(-N), .SD, by = aenm]

# compute percentages per assay
counts.by.aenm[, total := sum(N), by = aenm]
counts.by.aenm[, fraction := N/total, by = .(aenm, chosen_class)]

# for classes with < 1% of chems, group them as 'Other (class_type)'
other_rows <- counts.by.aenm[fraction < 0.01,
                             .( chosen_class = paste("Other", class_type, sep = " "),
                                N = sum(N),
                                total = .SD[1, total], # denominator stays the same 
                                fraction = sum(fraction)),
                             by = .(aenm, class_type)]

# relabel into broader categories
other_rows[chosen_class == "Other ClassyFire", chosen_class := "Other classes from ClassyFire"]
other_rows[chosen_class == "Other Pesticide", chosen_class := "Other Pesticides"]
other_rows[chosen_class == "Other Expert", chosen_class := "Other Expert chemicals"]
other_rows[chosen_class == "Other Drug", chosen_class := "Other Drugs"]
other_rows[chosen_class == "Other PFAS", chosen_class := "Other PFAS categories"]
other_rows[chosen_class == "Other Color", chosen_class := "Other colors"]

# append these rows and resort
counts.by.aenm <- rbindlist(list(counts.by.aenm, other_rows))
counts.by.aenm <- counts.by.aenm[order(-N), .SD, keyby = aenm]

# specify hole size for donut chart 
hsize <- 3 
counts.by.aenm[, x := hsize]
counts.by.aenm$aeid <- deiod.meta$aeid[match(counts.by.aenm$aenm, deiod.meta$aenm)]

# collect ggplot objects for ggpubr
plot_list <- list()

# form palette for top 25 classes from each ae
classes_to_plot <- counts.by.aenm[1:25, .SD, by = aenm][, unique(chosen_class)]
n.classes <- length(classes_to_plot)
class_pal <- c(brewer.pal(n = 12, name = "Set3"), qualitative_hcl(n.classes-12, palette = "set2"))

# to maintain consistent class-to-color map across subplots 
counts.by.aenm$chosen_class <- as.factor(counts.by.aenm$chosen_class)
class_pal <- setNames(class_pal, classes_to_plot)

for(i in 1:4) {
  plot_list[[ names(h.deiod.aeids)[i] ]] <- ggplot(counts.by.aenm[aeid == h.deiod.aeids[i]][1:25], aes(x = hsize, y = fraction, fill = chosen_class)) +
    geom_col(color = 'black') +
    geom_label_repel(aes(label = N), 
                     size = 7, 
                     position = position_stack(vjust = 0.5),
                     show.legend = FALSE, 
                     force_pull = 10) +
    scale_fill_manual(values = class_pal) +
    xlim(c(0.2, hsize + 0.5)) +
    coord_polar(theta = "y") +
    theme_void() +
    ggtitle(names(h.deiod.aeids)[i]) +
    theme(
      legend.title = element_text(size = 20), 
      legend.text = element_text(size = 20), 
      plot.title = element_text(size = 20)
    )
}

plot_list[["DIO1"]] <- plot_list[["DIO1"]] + guides(fill = guide_legend(nrow = 9, ncol = 3))

# combine the plots into one figure
class.comp <- ggarrange(plot_list$DIO1, plot_list$IYD,
                    plot_list$DIO2, plot_list$DIO3, 
                    ncol = 2, nrow = 2, 
                    common.legend = TRUE, 
                    legend = 'bottom')
class.comp

# ggsave(plot = class.comp,
#        units = "in",
#        dpi = 72,
#        width = 20, height = 14,
#        device = "jpg",
#        filename = "./doc/comptox-thyroid-httk/figures/supp/DIO_IYD_chem_library_composition.jpg")

Class Enrichment

Run Fisher’s test for every assay x class

# form hitcall and class matrix 
M.hitc2 <- M.hitc[, h.deiod.aenm] # take all assay columns 
M.class <- as.data.frame.matrix(table(classified_dt$dsstox_substance_id, 
                                           classified_dt$chosen_class))

# check that rows are ordered the same 
identical(rownames(M.hitc2), rownames(M.class))
## [1] TRUE
RunFisherTests <- function(M.hitc, M.ident, cf_level) {
  
  dfList <- list()
  num.aeids <- ncol(M.hitc)
  
  # run tests only on classes of sufficient size (n >= 3)
  cs = colSums(M.ident)
  col.ind <- which(cs >= 3)
  
  for(i in 1:num.aeids) {
    dataft <- matrix(NA, ncol = 8, nrow = length(col.ind))
    
    for(j in 1:length(col.ind)) {
      
      # calculate entries of 2x2 table 
      a <- sum(M.ident[, col.ind[[j]] ] * M.hitc[, i], na.rm = T)
      b <- sum(M.ident[, col.ind[[j]] ] * (1 - M.hitc[, i]), na.rm = T)
      c <- sum( (1 - M.ident[, col.ind[[j]] ]) * M.hitc[, i], na.rm = T)
      d <- sum( (1 - M.ident[, col.ind[[j]] ]) * (1 - M.hitc[, i]), na.rm = T)
      
      tf <- data.frame("active" = c(a,c), "inactive" = c(b, d),
                       row.names = paste(c('in', 'not in'), cf_level))
      
      fttf<- fisher.test(tf)
      
      dataft[j,] <- c(fttf$p.value, fttf$estimate, fttf$conf.int, a, b, c, d)
      
    }
    
    # construct the dataframe for each assay  
    ft.outputs <- c("Pval","Odds_Ratio","Lower","Upper")
    ft.cells <- paste0(c('Actives in ', 'Inactives in ', 'Actives not in ', 'Inactives not in '), cf_level)
    
    colnames(dataft) <- c(ft.outputs, ft.cells)
    
    dataft<-data.frame(dataft, check.names = FALSE)
    
    dataft$padj<-p.adjust(dataft$Pval, method="fdr")
    dataft[, cf_level] <- names(col.ind)
    
    dataft <- dataft[, c(1:4,9,10,5:8)]
    
    dfList[[i]] <- dataft
  }
  
  # reorder rows by most significant results with OR
  dfList = lapply(dfList, function(x) arrange(x, padj, desc(Odds_Ratio)))
  
  return(dfList)
}

res <- RunFisherTests(M.hitc2, M.class, 'group')

DIO1

For DIO1
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000231 42.7111064 5.1495406 1950.4429047 0.0033800 Estrogen flavonoid 6 1 240 1715
0.0000386 16.6762417 3.7765575 100.7212331 0.0033800 Quaternary ammonium salts 7 3 239 1713
0.0000826 21.3614515 3.7924053 216.9455955 0.0048163 Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor 6 2 240 1714
0.0004964 10.6736721 2.5112253 51.8308497 0.0217192 Estrogen nonsteroidal 6 4 240 1712
0.0010899 28.2448681 2.7815095 1384.8677637 0.0317886 Antibiotic polyketide 4 1 242 1715
0.0010899 28.2448681 2.7815095 1384.8677637 0.0317886 Insecticide Arylalkyl organothiophosphate AChE Inhibitor 4 1 242 1715
0.0019501 Inf 2.8965972 Inf 0.0426586 Aryl chlorides 3 0 243 1716
0.0019501 Inf 2.8965972 Inf 0.0426586 Fungicide Phthalimide Multi-site activity 3 0 243 1716
0.0072497 5.9037294 1.4138688 23.4145767 0.1153357 Bisphenol 5 6 241 1710
0.0068402 4.7357105 1.3742904 15.0520307 0.1153357 Insecticide Aryl organothiophosphate AChE Inhibitor 6 9 240 1707
0.0070574 0.0000000 0.0000000 0.6466012 0.1153357 Carbonyl compounds 0 41 246 1675
0.0260963 0.0000000 0.0000000 0.9043474 0.3805717 Alcohols and polyols 0 30 246 1686
0.0430942 14.0219320 0.7274453 824.9556901 0.3969199 Anilides 2 1 244 1715
0.0430942 14.0219320 0.7274453 824.9556901 0.3969199 Lineolic acids and derivatives 2 1 244 1715
0.0430942 14.0219320 0.7274453 824.9556901 0.3969199 Organometallic Hg 2 1 244 1715
0.0430942 14.0219320 0.7274453 824.9556901 0.3969199 Retinoid 2 1 244 1715
0.0430942 14.0219320 0.7274453 824.9556901 0.3969199 Thyroid hormone 2 1 244 1715
0.0430942 14.0219320 0.7274453 824.9556901 0.3969199 Tryptase inhibitor 2 1 244 1715
0.0389029 4.0313648 0.8588912 15.9935326 0.3969199 Sulfuric acid esters 4 7 242 1709
0.0463707 5.2765205 0.7683196 31.4089393 0.4057435 PFAA precursors 3 4 243 1712
0.0527664 3.5251843 0.7709891 13.2794249 0.4397199 1-hydroxy-2-unsubstituted benzenoids 4 8 242 1708
0.0574597 2.2723631 0.8755678 5.2720346 0.4570660 PFAAs 8 25 238 1691
0.0791136 7.0123087 0.5061477 97.1838883 0.5768699 HMGCoA inhibitor 2 2 244 1714
0.0791136 7.0123087 0.5061477 97.1838883 0.5768699 PPARg agonist 2 2 244 1714
0.0978861 0.0000000 0.0000000 1.3339212 0.6852027 Fungicide Triazole Sterol Biosynthesis in Membranes 0 21 246 1695
0.1051428 1.6649982 0.8452681 3.0747380 0.7076916 Other aliphatics 14 60 232 1656
0.1211546 4.6745339 0.3886038 40.9669562 0.7562077 Antiviral 2 3 244 1713
0.1198537 3.0116558 0.4992440 13.3048425 0.7562077 Polymer unclassified 3 7 243 1709
0.1253144 1.9546020 0.7629369 4.4473285 0.7562077 Unclassed 8 29 238 1687
0.1671538 3.5045293 0.3154710 24.6013135 0.8807429 Benzothiazoles 2 4 244 1712
0.1671538 3.5045293 0.3154710 24.6013135 0.8807429 COX inhibitor 2 4 244 1712
0.1656632 0.2461696 0.0059969 1.5031031 0.8807429 Carboxylic acid derivatives 1 28 245 1688
0.1711158 0.2219552 0.0054252 1.3446220 0.8807429 Monoterpenoids 1 31 245 1685
0.1531528 0.0000000 0.0000000 1.5824389 0.8807429 Organosilicon compounds 0 18 246 1698
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Benzenediols 1 2 245 1714
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Benzenesulfonyl compounds 1 2 245 1714
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Fungicide Anilinopyrimidine Amino Acid and Protein Synthesis 1 2 245 1714
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Fungicide Imidazole Sterol Biosynthesis in Membranes 1 2 245 1714
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Fungicide Thiocarbamate Multi-site activity 1 2 245 1714
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Herbicide Triazolopyrimidine Acetolactate synthase inhibitor 1 2 245 1714
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Phenanthrenes and derivatives 1 2 245 1714
0.3311025 3.4943266 0.0590515 67.4562505 1.0000000 Vinyl chlorides 1 2 245 1714
0.2154671 2.8028199 0.2655255 17.2318761 1.0000000 Antifungal 2 5 244 1711
0.2154671 2.8028199 0.2655255 17.2318761 1.0000000 Azobenzenes 2 5 244 1711
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Adrenergic beta agonist 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Androgen nonsteroidal antagonist 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Benzofuranones 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Biphenyls and derivatives 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 FASA based PFAA precursors 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Glyceryl 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Halopyridines 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Herbicide N-Phenylimide Protoporphyrinogen Oxidase inhibitor 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Phenylmethylamines 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Serotonin receptor antagonist 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Side-chain aromatics 1 3 245 1713
0.4150988 2.3292572 0.0442163 29.1400687 1.0000000 Triazoles 1 3 245 1713
0.3033204 1.7555792 0.4236236 5.5029800 1.0000000 Fatty acids and conjugates 4 16 242 1700
0.4885849 1.7463273 0.0353301 17.7357118 1.0000000 Pyrrolidones 1 4 245 1712
0.2927271 1.5634650 0.5223738 3.9186390 1.0000000 Phenylpropanes 6 27 240 1689
0.5528711 1.3964615 0.0294122 12.5543164 1.0000000 Androgen 1 5 245 1711
0.5528711 1.3964615 0.0294122 12.5543164 1.0000000 Antifungal paraben 1 5 245 1711
0.5528711 1.3964615 0.0294122 12.5543164 1.0000000 Cumenes 1 5 245 1711
0.5528711 1.3964615 0.0294122 12.5543164 1.0000000 Diphenylethers 1 5 245 1711
0.5528711 1.3964615 0.0294122 12.5543164 1.0000000 Insecticide Organophosphate AChE Inhibitor 1 5 245 1711
0.5528711 1.3964615 0.0294122 12.5543164 1.0000000 Isoindolines 1 5 245 1711
0.5528711 1.3964615 0.0294122 12.5543164 1.0000000 Naphthols and derivatives 1 5 245 1711
0.6729907 1.2703218 0.1360581 5.8721892 1.0000000 Insecticide Carbamate AChE Inhibitor 2 11 244 1705
0.6921051 1.1638247 0.1257564 5.2750657 1.0000000 Amino acids, peptides, and analogues 2 12 244 1704
1.0000000 1.1631568 0.0251881 9.6495391 1.0000000 Benzophenones 1 6 245 1710
1.0000000 1.1631568 0.0251881 9.6495391 1.0000000 Insecticide Organochlorine Sodium channel modulator 1 6 245 1710
1.0000000 0.9965032 0.0220218 7.8100757 1.0000000 Benzenesulfonic acids and derivatives 1 7 245 1709
1.0000000 0.9965032 0.0220218 7.8100757 1.0000000 Herbicide Dinitroaniline Microtubule assembly inhibitor 1 7 245 1709
1.0000000 0.9964889 0.1092701 4.3796481 1.0000000 Insecticide Pyrethroid Sodium channel modulator 2 14 244 1702
1.0000000 0.9295322 0.1025357 4.0358158 1.0000000 Nitrobenzenes 2 15 244 1701
1.0000000 0.8714592 0.0195600 6.5474555 1.0000000 Halophenols 1 8 245 1708
1.0000000 0.8714592 0.0195600 6.5474555 1.0000000 Herbicide Aryloxyphenoxypropionic Acetyl CoA carboxylase inhibitor 1 8 245 1708
1.0000000 0.8714592 0.0195600 6.5474555 1.0000000 Insecticide Chlorinated cyclodiene GABA-gated chloride channel blocker 1 8 245 1708
0.8112857 0.7501553 0.1925318 2.1152422 1.0000000 Benzoic acids and derivatives 4 37 242 1679
1.0000000 0.6964391 0.0159808 4.9334381 1.0000000 Cresols 1 10 245 1706
1.0000000 0.6964391 0.0159808 4.9334381 1.0000000 Insecticide Aliphatic organothiophosphate AChE Inhibitor 1 10 245 1706
0.7923622 0.6711660 0.1303103 2.1769729 1.0000000 Fatty acid esters 3 31 243 1685
0.7606135 0.6034797 0.0685457 2.4677036 1.0000000 Pharmaceutical unknown MOA 2 23 244 1693
1.0000000 0.5348297 0.0125344 3.5905461 1.0000000 Phosphate esters 1 13 245 1703
0.7095704 0.4963472 0.0116914 3.2899279 1.0000000 Dicarboxylic acids and derivatives 1 14 245 1702
0.5743186 0.4942833 0.0567131 1.9810519 1.0000000 Amines 2 28 244 1688
0.4982029 0.3851650 0.0092075 2.4611797 1.0000000 Halobenzenes 1 18 245 1698
0.4994651 0.3646829 0.0087418 2.3146946 1.0000000 Toluenes 1 19 245 1697
0.3472809 0.3144065 0.0075880 1.9629541 1.0000000 Fluorotelomer PFAA precursors 1 22 245 1694
0.3472809 0.3144065 0.0075880 1.9629541 1.0000000 Phthalate 1 22 245 1694
0.3497980 0.3005618 0.0072675 1.8680690 1.0000000 Fatty alcohols 1 23 245 1693
0.3550796 0.2878706 0.0069727 1.7817217 1.0000000 Benzene and substituted derivatives 1 24 245 1692
0.3550796 0.2878706 0.0069727 1.7817217 1.0000000 Ethers 1 24 245 1692
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 1,3,5-triazines 0 4 246 1712
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Alkaloid 0 3 246 1713
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Alkanes 0 4 246 1712
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Alkyl chlorides 0 3 246 1713
1.0000000 0.0000000 0.0000000 5.9389670 1.0000000 Alpha-halocarboxylic acids and derivatives 0 6 246 1710
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Androgen antagonist 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Anesthetic 0 3 246 1713
1.0000000 0.0000000 0.0000000 5.9389670 1.0000000 Anisoles 0 6 246 1710
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Anthracenes 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Anthraquinones 0 3 246 1713
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Antibiotic 0 4 246 1712
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Aromatase inhibitor 0 4 246 1712
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Aromatic oil 0 7 246 1709
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Benzoic acid 0 3 246 1713
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Benzyloxycarbonyls 0 4 246 1712
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Cannabinoid receptor antagonist 0 7 246 1709
1.0000000 0.0000000 0.0000000 5.9389670 1.0000000 Carbohydrates and carbohydrate conjugates 0 6 246 1710
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Chlorohydrins 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Cinnamaldehydes 0 3 246 1713
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Cinnamic acids and derivatives 0 4 246 1712
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Corticosteroid 0 4 246 1712
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Coumarin 0 7 246 1709
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Depsides and depsidones 0 3 246 1713
0.3910531 0.0000000 0.0000000 2.2885318 1.0000000 Diphenylmethanes 0 13 246 1703
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 DNA synthesis inhibitor 0 3 246 1713
1.0000000 0.0000000 0.0000000 37.1917505 1.0000000 Dopamine receptor agonist 0 2 246 1714
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Estrogen 0 7 246 1709
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Fatty alcohol esters 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Fibrate 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Fungicide Benzimidazole Cytoskeleton and motor protein inhibitor 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Fungicide Dicarboximide Signal Transduction 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Fungicide Phenylbenzamide Respiration 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Fungicide Polychloroaromatic Lipid Synthesis or Transport / Membrane Integrity or Function 0 3 246 1713
0.6065058 0.0000000 0.0000000 4.0945622 1.0000000 Gamma butyrolactones 0 8 246 1708
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Glycogen phosphorylase inhibitor 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Herbicide Cyclohexanedione oxime Acetyl CoA carboxylase inhibitor 0 3 246 1713
1.0000000 0.0000000 0.0000000 7.6317715 1.0000000 Herbicide Diphenyl ether Protoporphyrinogen Oxidase inhibitor 0 5 246 1711
1.0000000 0.0000000 0.0000000 5.9389670 1.0000000 Herbicide Imidazolinone Acetolactate synthase inhibitor 0 6 246 1710
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Herbicide Phenoxyacetic Auxin mimic 0 4 246 1712
1.0000000 0.0000000 0.0000000 7.6317715 1.0000000 Herbicide Phenylurea Inhbition of photosynthesis at PSll 0 5 246 1711
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Herbicide Pyridyloxycarboxylic acid Auxin mimic 0 4 246 1712
0.6065058 0.0000000 0.0000000 4.0945622 1.0000000 Herbicide Thiocarbamate Very Long-Chain Fatty Acid Synthesis inhibitor 0 8 246 1708
0.6065058 0.0000000 0.0000000 4.0945622 1.0000000 Herbicide Triazine Inhibition of photosynthesis at PSll 0 8 246 1708
0.3820874 0.0000000 0.0000000 2.5113465 1.0000000 Imidazolidines 0 12 246 1704
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Insecticide Benzoylurea Chitin biosynthesis inhibitor 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Insecticide Diacylhydrazine Ecdysone receptor agonist 0 3 246 1713
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Insecticide Nitroguanidine NACHR Competative Modulator 0 4 246 1712
1.0000000 0.0000000 0.0000000 7.6317715 1.0000000 Insecticide Phosphoramidate AChE Inhibitor 0 5 246 1711
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Kinase inhibitor 0 4 246 1712
1.0000000 0.0000000 0.0000000 37.1917505 1.0000000 Methoxybenzenes 0 2 246 1714
1.0000000 0.0000000 0.0000000 5.9389670 1.0000000 Methoxyphenols 0 6 246 1710
1.0000000 0.0000000 0.0000000 7.6317715 1.0000000 Morpholines 0 5 246 1711
0.6237717 0.0000000 0.0000000 3.1155696 1.0000000 Naphthalene sulfonic acids and derivatives 0 10 246 1706
0.3910531 0.0000000 0.0000000 2.2885318 1.0000000 Naphthalenes 0 13 246 1703
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Neurokinin receptor antagonist 0 4 246 1712
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Nitrophenols 0 3 246 1713
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Organic carbonic acids and derivatives 0 3 246 1713
0.6065058 0.0000000 0.0000000 4.0945622 1.0000000 Organic cyanides 0 8 246 1708
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Organic metalloid salts 0 4 246 1712
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Organic nitro compounds 0 3 246 1713
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Organic nitroso compounds 0 4 246 1712
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Organobromides 0 3 246 1713
0.6065058 0.0000000 0.0000000 4.0945622 1.0000000 Organometallic Sn 0 8 246 1708
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Organonitrogen compounds 0 4 246 1712
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Organosulfonic acids and derivatives 0 3 246 1713
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Phenol ethers 0 7 246 1709
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Phenoxy compounds 0 3 246 1713
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Phenoxyacetic acid derivatives 0 7 246 1709
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Phenylhydrazines 0 3 246 1713
1.0000000 0.0000000 0.0000000 7.6317715 1.0000000 Phosphodiesterase inhibitor 0 5 246 1711
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Phosphonic acid diesters 0 3 246 1713
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Progestogen 0 7 246 1709
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Pyranones and derivatives 0 4 246 1712
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Pyrazoles 0 4 246 1712
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Pyridinecarboxylic acids and derivatives 0 4 246 1712
0.6065058 0.0000000 0.0000000 4.0945622 1.0000000 Sesquiterpenoids 0 8 246 1708
1.0000000 0.0000000 0.0000000 5.9389670 1.0000000 Silicon PFASs 0 6 246 1710
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Sodium channel inhibitor 0 3 246 1713
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Tetracarboxylic acids and derivatives 0 4 246 1712
1.0000000 0.0000000 0.0000000 10.5932345 1.0000000 Thioureas 0 4 246 1712
1.0000000 0.0000000 0.0000000 5.9389670 1.0000000 Triazinones 0 6 246 1710
0.6063620 0.0000000 0.0000000 4.8505601 1.0000000 Vitamin B 0 7 246 1709
1.0000000 0.0000000 0.0000000 16.9121965 1.0000000 Xylenes 0 3 246 1713

DIO2

For DIO2
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000237 35.5383595 4.5380638 1591.5876342 0.0020731 Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor 7 1 321 1633
0.0000187 20.3484965 4.0356747 197.1556111 0.0020731 Estrogen nonsteroidal 8 2 320 1632
0.0002609 11.8319707 2.6840144 71.2785633 0.0152194 Quaternary ammonium salts 7 3 321 1631
0.0006151 8.8705109 2.2398629 41.5664715 0.0269114 Bisphenol 7 4 321 1630
0.0046367 Inf 2.0658544 Inf 0.0901575 Cinnamaldehydes 3 0 325 1634
0.0046367 Inf 2.0658544 Inf 0.0901575 Fungicide Phthalimide Multi-site activity 3 0 325 1634
0.0046367 Inf 2.0658544 Inf 0.0901575 Fungicide Thiocarbamate Multi-site activity 3 0 325 1634
0.0046367 Inf 2.0658544 Inf 0.0901575 Retinoid 3 0 325 1634
0.0045821 8.4013791 1.6254644 54.3339076 0.0901575 Organometallic Sn 5 3 323 1631
0.0058732 0.0000000 0.0000000 0.6442248 0.1027815 Alcohols and polyols 0 30 328 1604
0.0145901 0.0000000 0.0000000 0.7851724 0.2321157 Ethers 0 25 328 1609
0.0166094 2.5411843 1.1009965 5.5324058 0.2422209 PFAAs 11 22 317 1612
0.0244546 4.1957891 1.0065655 16.6110128 0.3291963 Sulfuric acid esters 5 6 323 1628
0.0355920 7.5200279 0.8579640 90.4924956 0.4449004 Antibiotic polyketide 3 2 325 1632
0.0743575 10.0013972 0.5191722 589.0165789 0.5421898 Aryl chlorides 2 1 326 1633
0.0743575 10.0013972 0.5191722 589.0165789 0.5421898 Fungicide Imidazole Sterol Biosynthesis in Membranes 2 1 326 1633
0.0743575 10.0013972 0.5191722 589.0165789 0.5421898 Lineolic acids and derivatives 2 1 326 1633
0.0743575 10.0013972 0.5191722 589.0165789 0.5421898 Organometallic Hg 2 1 326 1633
0.0743575 10.0013972 0.5191722 589.0165789 0.5421898 Tryptase inhibitor 2 1 326 1633
0.0624905 5.0124666 0.6683670 37.5891629 0.5421898 Insecticide Organophosphate AChE Inhibitor 3 3 325 1631
0.0546515 1.7646789 0.9746905 3.0705207 0.5421898 Other aliphatics 19 55 309 1579
0.0737918 0.1754905 0.0042803 1.0693795 0.5421898 Carboxylic acid derivatives 1 28 327 1606
0.0474688 0.1693344 0.0041352 1.0289678 0.5421898 Amines 1 29 327 1605
0.0570147 0.0000000 0.0000000 1.1277798 0.5421898 Organosilicon compounds 0 18 328 1616
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Adrenergic beta agonist 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Benzofuranones 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Biphenyls and derivatives 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Cinnamic acids and derivatives 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Glyceryl 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Halopyridines 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 HMGCoA inhibitor 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Kinase inhibitor 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Neurokinin receptor antagonist 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 PPARg agonist 2 2 326 1632
0.1324759 5.0001685 0.3612311 69.3203126 0.5944431 Serotonin receptor antagonist 2 2 326 1632
0.0961341 3.7579305 0.5479218 22.3250931 0.5944431 Azobenzenes 3 4 325 1630
0.1256308 2.5077119 0.5492858 9.4272436 0.5944431 1-hydroxy-2-unsubstituted benzenoids 4 8 324 1626
0.1272028 2.1551243 0.6735971 6.0246234 0.5944431 Fatty acids and conjugates 6 14 322 1620
0.1150900 1.8708692 0.7997926 4.0368078 0.5944431 Unclassed 10 27 318 1607
0.1363603 0.3878256 0.0761295 1.2339593 0.5965764 Carbonyl compounds 3 38 325 1596
0.1571039 0.2241779 0.0054166 1.3969231 0.6705652 Phthalate 1 22 327 1612
0.1665823 2.2827619 0.6175169 7.1850906 0.6940931 Insecticide Pyrethroid Sodium channel modulator 5 11 323 1623
0.1970651 3.3325275 0.2773229 29.2144241 0.8020093 Insecticide Arylalkyl organothiophosphate AChE Inhibitor 2 3 326 1631
0.2258422 2.1444670 0.3559891 9.4564320 0.8946589 Polymer unclassified 3 7 325 1627
0.2300552 0.2746652 0.0065732 1.7518296 0.8946589 Halobenzenes 1 18 327 1616
0.2362145 0.0000000 0.0000000 1.7902528 0.8986423 Imidazolidines 0 12 328 1622
0.2643525 2.4984709 0.2251265 17.5144369 0.9441160 Antifungal paraben 2 4 326 1630
0.2643525 2.4984709 0.2251265 17.5144369 0.9441160 Naphthols and derivatives 2 4 326 1630
0.2553962 1.7717604 0.5675161 4.7552743 0.9441160 Fluorotelomer PFAA precursors 6 17 322 1617
0.2711131 2.0041341 0.4559759 7.0022720 0.9488959 Amino acids, peptides, and analogues 4 10 324 1624
0.2962998 1.8208781 0.4202392 6.1937736 0.9971629 Insecticide Aryl organothiophosphate AChE Inhibitor 4 11 324 1623
0.2921177 0.5330098 0.1370522 1.4986981 0.9971629 Benzoic acids and derivatives 4 37 324 1597
0.4225349 2.4938888 0.0421695 48.0257439 1.0000000 Anilides 1 2 327 1632
0.4225349 2.4938888 0.0421695 48.0257439 1.0000000 Anthracenes 1 2 327 1632
0.4225349 2.4938888 0.0421695 48.0257439 1.0000000 Benzenesulfonyl compounds 1 2 327 1632
0.4225349 2.4938888 0.0421695 48.0257439 1.0000000 Depsides and depsidones 1 2 327 1632
0.4225349 2.4938888 0.0421695 48.0257439 1.0000000 Fungicide Anilinopyrimidine Amino Acid and Protein Synthesis 1 2 327 1632
0.4225349 2.4938888 0.0421695 48.0257439 1.0000000 Phenanthrenes and derivatives 1 2 327 1632
0.4225349 2.4938888 0.0421695 48.0257439 1.0000000 Vinyl chlorides 1 2 327 1632
0.3316398 1.9979807 0.1894720 12.2686000 1.0000000 Antifungal 2 5 326 1629
0.6278200 1.6641098 0.1635553 9.3596311 1.0000000 Herbicide Dinitroaniline Microtubule assembly inhibitor 2 6 326 1628
0.6278200 1.6641098 0.1635553 9.3596311 1.0000000 Sesquiterpenoids 2 6 326 1628
0.5192213 1.6620789 0.0315745 20.7722908 1.0000000 Antibiotic 1 3 327 1631
0.5192213 1.6620789 0.0315745 20.7722908 1.0000000 FASA based PFAA precursors 1 3 327 1631
0.5192213 1.6620789 0.0315745 20.7722908 1.0000000 Herbicide N-Phenylimide Protoporphyrinogen Oxidase inhibitor 1 3 327 1631
0.5192213 1.6620789 0.0315745 20.7722908 1.0000000 Phenylmethylamines 1 3 327 1631
0.5192213 1.6620789 0.0315745 20.7722908 1.0000000 Triazoles 1 3 327 1631
0.6513669 1.4256511 0.1438593 7.5325946 1.0000000 Herbicide Aryloxyphenoxypropionic Acetyl CoA carboxylase inhibitor 2 7 326 1627
0.6513669 1.4256511 0.1438593 7.5325946 1.0000000 Insecticide Chlorinated cyclodiene GABA-gated chloride channel blocker 2 7 326 1627
1.0000000 1.2460267 0.0252281 12.6479020 1.0000000 Antiviral 1 4 327 1630
1.0000000 1.2460267 0.0252281 12.6479020 1.0000000 Insecticide Phosphoramidate AChE Inhibitor 1 4 327 1630
1.0000000 1.2460267 0.0252281 12.6479020 1.0000000 Pyrrolidones 1 4 327 1630
1.0000000 0.9963321 0.0210017 8.9466070 1.0000000 Androgen 1 5 327 1629
1.0000000 0.9963321 0.0210017 8.9466070 1.0000000 Benzothiazoles 1 5 327 1629
1.0000000 0.9963321 0.0210017 8.9466070 1.0000000 COX inhibitor 1 5 327 1629
1.0000000 0.9963321 0.0210017 8.9466070 1.0000000 Diphenylethers 1 5 327 1629
1.0000000 0.9963321 0.0210017 8.9466070 1.0000000 Isoindolines 1 5 327 1629
1.0000000 0.9963321 0.0210017 8.9466070 1.0000000 Methoxyphenols 1 5 327 1629
1.0000000 0.9482780 0.2350987 2.8366733 1.0000000 Benzene and substituted derivatives 4 21 324 1613
1.0000000 0.9213472 0.2749960 2.4519041 1.0000000 Monoterpenoids 5 27 323 1607
1.0000000 0.9052541 0.0970575 4.1765945 1.0000000 Naphthalenes 2 11 326 1623
1.0000000 0.8780650 0.1639140 3.0610979 1.0000000 Toluenes 3 17 325 1617
1.0000000 0.8298401 0.0179850 6.8747128 1.0000000 Aromatic oil 1 6 327 1628
1.0000000 0.8298401 0.0179850 6.8747128 1.0000000 Estrogen flavonoid 1 6 327 1628
1.0000000 0.8298401 0.0179850 6.8747128 1.0000000 Insecticide Organochlorine Sodium channel modulator 1 6 327 1628
1.0000000 0.8293182 0.0897367 3.7516562 1.0000000 Phosphate esters 2 12 326 1622
1.0000000 0.7650801 0.0834362 3.4039751 1.0000000 Dicarboxylic acids and derivatives 2 13 326 1621
1.0000000 0.7109025 0.0157271 5.5630154 1.0000000 Benzenesulfonic acids and derivatives 1 7 327 1627
1.0000000 0.7109025 0.0157271 5.5630154 1.0000000 Organic cyanides 1 7 327 1627
0.6392052 0.6833889 0.1733977 1.9651489 1.0000000 Phenylpropanes 4 29 324 1605
0.7535498 0.6622911 0.0731470 2.8692926 1.0000000 Nitrobenzenes 2 15 326 1619
1.0000000 0.6216904 0.0139689 4.6633706 1.0000000 Halophenols 1 8 327 1626
1.0000000 0.5522974 0.0125629 4.0092876 1.0000000 Naphthalene sulfonic acids and derivatives 1 9 327 1625
0.5584758 0.5216152 0.0586390 2.1790053 1.0000000 Fungicide Triazole Sterol Biosynthesis in Membranes 2 19 326 1615
0.7028505 0.4967791 0.0114128 3.5133956 1.0000000 Cresols 1 10 327 1624
0.7028505 0.4967791 0.0114128 3.5133956 1.0000000 Insecticide Aliphatic organothiophosphate AChE Inhibitor 1 10 327 1624
0.3500164 0.4774653 0.0928492 1.5447851 1.0000000 Fatty acid esters 3 31 325 1603
0.4086068 0.4496622 0.0510158 1.8440247 1.0000000 Fatty alcohols 2 22 326 1612
0.4142846 0.4298503 0.0488919 1.7537226 1.0000000 Pharmaceutical unknown MOA 2 23 326 1611
0.7076828 0.4134887 0.0096439 2.8125898 1.0000000 Diphenylmethanes 1 12 327 1622
0.7076828 0.4134887 0.0096439 2.8125898 1.0000000 Insecticide Carbamate AChE Inhibitor 1 12 327 1622
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 1,3,5-triazines 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Alkaloid 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Alkanes 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Alkyl chlorides 0 3 328 1631
0.5975106 0.0000000 0.0000000 4.2359085 1.0000000 Alpha-halocarboxylic acids and derivatives 0 6 328 1628
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Androgen antagonist 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Androgen nonsteroidal antagonist 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Anesthetic 0 3 328 1631
0.5975106 0.0000000 0.0000000 4.2359085 1.0000000 Anisoles 0 6 328 1628
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Anthraquinones 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Aromatase inhibitor 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Benzenediols 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Benzoic acid 0 3 328 1631
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Benzophenones 0 7 328 1627
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Benzyloxycarbonyls 0 4 328 1630
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Cannabinoid receptor antagonist 0 7 328 1627
0.5975106 0.0000000 0.0000000 4.2359085 1.0000000 Carbohydrates and carbohydrate conjugates 0 6 328 1628
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Chlorohydrins 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Corticosteroid 0 4 328 1630
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Coumarin 0 7 328 1627
0.5975106 0.0000000 0.0000000 4.2359085 1.0000000 Cumenes 0 6 328 1628
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 DNA synthesis inhibitor 0 3 328 1631
1.0000000 0.0000000 0.0000000 26.5592830 1.0000000 Dopamine receptor agonist 0 2 328 1632
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Estrogen 0 7 328 1627
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Fatty alcohol esters 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Fibrate 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Fungicide Benzimidazole Cytoskeleton and motor protein inhibitor 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Fungicide Dicarboximide Signal Transduction 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Fungicide Phenylbenzamide Respiration 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Fungicide Polychloroaromatic Lipid Synthesis or Transport / Membrane Integrity or Function 0 3 328 1631
0.3661738 0.0000000 0.0000000 2.9196968 1.0000000 Gamma butyrolactones 0 8 328 1626
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Glycogen phosphorylase inhibitor 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Herbicide Cyclohexanedione oxime Acetyl CoA carboxylase inhibitor 0 3 328 1631
0.5973048 0.0000000 0.0000000 5.4439403 1.0000000 Herbicide Diphenyl ether Protoporphyrinogen Oxidase inhibitor 0 5 328 1629
0.5975106 0.0000000 0.0000000 4.2359085 1.0000000 Herbicide Imidazolinone Acetolactate synthase inhibitor 0 6 328 1628
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Herbicide Phenoxyacetic Auxin mimic 0 4 328 1630
0.5973048 0.0000000 0.0000000 5.4439403 1.0000000 Herbicide Phenylurea Inhbition of photosynthesis at PSll 0 5 328 1629
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Herbicide Pyridyloxycarboxylic acid Auxin mimic 0 4 328 1630
0.3661738 0.0000000 0.0000000 2.9196968 1.0000000 Herbicide Thiocarbamate Very Long-Chain Fatty Acid Synthesis inhibitor 0 8 328 1626
0.3661738 0.0000000 0.0000000 2.9196968 1.0000000 Herbicide Triazine Inhibition of photosynthesis at PSll 0 8 328 1626
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Herbicide Triazolopyrimidine Acetolactate synthase inhibitor 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Insecticide Benzoylurea Chitin biosynthesis inhibitor 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Insecticide Diacylhydrazine Ecdysone receptor agonist 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Insecticide Nitroguanidine NACHR Competative Modulator 0 4 328 1630
1.0000000 0.0000000 0.0000000 26.5592830 1.0000000 Methoxybenzenes 0 2 328 1632
0.5973048 0.0000000 0.0000000 5.4439403 1.0000000 Morpholines 0 5 328 1629
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Nitrophenols 0 3 328 1631
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Organic carbonic acids and derivatives 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Organic metalloid salts 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Organic nitro compounds 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Organic nitroso compounds 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Organobromides 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Organonitrogen compounds 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Organosulfonic acids and derivatives 0 3 328 1631
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 PFAA precursors 0 7 328 1627
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Phenol ethers 0 7 328 1627
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Phenoxy compounds 0 3 328 1631
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Phenoxyacetic acid derivatives 0 7 328 1627
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Phenylhydrazines 0 3 328 1631
0.5973048 0.0000000 0.0000000 5.4439403 1.0000000 Phosphodiesterase inhibitor 0 5 328 1629
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Phosphonic acid diesters 0 3 328 1631
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Progestogen 0 7 328 1627
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Pyranones and derivatives 0 4 328 1630
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Pyrazoles 0 4 328 1630
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Pyridinecarboxylic acids and derivatives 0 4 328 1630
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Side-chain aromatics 0 4 328 1630
0.5975106 0.0000000 0.0000000 4.2359085 1.0000000 Silicon PFASs 0 6 328 1628
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Sodium channel inhibitor 0 3 328 1631
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Tetracarboxylic acids and derivatives 0 4 328 1630
1.0000000 0.0000000 0.0000000 7.5578204 1.0000000 Thioureas 0 4 328 1630
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Thyroid hormone 0 3 328 1631
0.5975106 0.0000000 0.0000000 4.2359085 1.0000000 Triazinones 0 6 328 1628
0.6089309 0.0000000 0.0000000 3.4592427 1.0000000 Vitamin B 0 7 328 1627
1.0000000 0.0000000 0.0000000 12.0731320 1.0000000 Xylenes 0 3 328 1631

DIO3

For DIO3
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000258 35.0155107 4.4714250 1568.3849512 0.0022538 Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor 7 1 325 1629
0.0000258 35.0155107 4.4714250 1568.3849512 0.0022538 Herbicide Triazine Inhibition of photosynthesis at PSll 7 1 325 1629
0.0002825 11.6580174 2.6443576 70.1918987 0.0164770 Quaternary ammonium salts 7 3 325 1627
0.0006646 8.7402222 2.2071299 40.9936493 0.0290752 Bisphenol 7 4 325 1626
0.0048089 Inf 2.0358336 Inf 0.0935067 Cinnamaldehydes 3 0 329 1630
0.0048089 Inf 2.0358336 Inf 0.0935067 Fungicide Phthalimide Multi-site activity 3 0 329 1630
0.0048089 Inf 2.0358336 Inf 0.0935067 Fungicide Thiocarbamate Multi-site activity 3 0 329 1630
0.0048089 Inf 2.0358336 Inf 0.0935067 Retinoid 3 0 329 1630
0.0034970 19.8172136 1.9532697 973.2823962 0.0935067 Antibiotic polyketide 4 1 328 1629
0.0058108 0.0000000 0.0000000 0.6348346 0.1016884 Alcohols and polyols 0 30 332 1600
0.0144268 0.0000000 0.0000000 0.7737403 0.2295177 Ethers 0 25 332 1605
0.0162228 4.9635409 1.1353425 21.6981396 0.2365830 Estrogen nonsteroidal 5 5 327 1625
0.0319772 4.9514576 0.9173501 26.7263933 0.4304621 Sesquiterpenoids 4 4 328 1626
0.0364053 0.0000000 0.0000000 0.9367034 0.4550663 Fungicide Triazole Sterol Biosynthesis in Membranes 0 21 332 1609
0.0500416 3.9594490 0.7813475 18.4967236 0.5473302 Insecticide Chlorinated cyclodiene GABA-gated chloride channel blocker 4 5 328 1625
0.0473808 0.1729384 0.0042183 1.0537561 0.5473302 Carboxylic acid derivatives 1 28 331 1602
0.0567660 0.0000000 0.0000000 1.1113794 0.5843561 Organosilicon compounds 0 18 332 1612
0.0760683 9.8548166 0.5116482 580.4900579 0.6050890 Aryl chlorides 2 1 330 1629
0.0760683 9.8548166 0.5116482 580.4900579 0.6050890 Depsides and depsidones 2 1 330 1629
0.0760683 9.8548166 0.5116482 580.4900579 0.6050890 Lineolic acids and derivatives 2 1 330 1629
0.0760683 9.8548166 0.5116482 580.4900579 0.6050890 Tryptase inhibitor 2 1 330 1629
0.0644797 4.9394480 0.6586628 37.0397871 0.6050890 Naphthols and derivatives 3 3 329 1627
0.0990325 3.7031904 0.5399719 21.9987249 0.6956864 Azobenzenes 3 4 329 1626
0.0990325 3.7031904 0.5399719 21.9987249 0.6956864 Progestogen 3 4 329 1626
0.0993838 2.8253810 0.6030306 11.1867656 0.6956864 Sulfuric acid esters 4 7 328 1623
0.1353239 4.9275114 0.3559961 68.3007202 0.7400524 Benzofuranones 2 2 330 1628
0.1353239 4.9275114 0.3559961 68.3007202 0.7400524 Cinnamic acids and derivatives 2 2 330 1628
0.1353239 4.9275114 0.3559961 68.3007202 0.7400524 Glyceryl 2 2 330 1628
0.1353239 4.9275114 0.3559961 68.3007202 0.7400524 HMGCoA inhibitor 2 2 330 1628
0.1353239 4.9275114 0.3559961 68.3007202 0.7400524 PPARg agonist 2 2 330 1628
0.1300344 2.4710014 0.5413081 9.2872667 0.7400524 1-hydroxy-2-unsubstituted benzenoids 4 8 328 1622
0.1308264 2.1234595 0.6637476 5.9355552 0.7400524 Fatty acids and conjugates 6 14 326 1616
0.1551174 2.4755285 0.6594501 8.0120251 0.7882308 Insecticide Aryl organothiophosphate AChE Inhibitor 5 10 327 1620
0.1576462 0.2209199 0.0053382 1.3765264 0.7882308 Fluorotelomer PFAA precursors 1 22 331 1608
0.1576462 0.2209199 0.0053382 1.3765264 0.7882308 Phthalate 1 22 331 1608
0.2010154 3.2841103 0.2733031 28.7903202 0.9507485 Antiviral 2 3 330 1627
0.2010154 3.2841103 0.2733031 28.7903202 0.9507485 Insecticide Arylalkyl organothiophosphate AChE Inhibitor 2 3 330 1627
0.2148047 0.3468821 0.0398537 1.3870130 0.9815311 Amines 2 28 330 1602
0.2290132 0.2706752 0.0064780 1.7262255 0.9815311 Halobenzenes 1 18 331 1612
0.2299587 0.2562727 0.0061503 1.6233819 0.9815311 Toluenes 1 19 331 1611
0.2287691 0.0000000 0.0000000 1.9542532 0.9815311 Insecticide Aliphatic organothiophosphate AChE Inhibitor 0 11 332 1619
0.3098681 4.9156473 0.0625087 385.2996888 1.0000000 Dopamine receptor agonist 1 1 331 1629
0.2692832 2.4620095 0.2218629 17.2591161 1.0000000 Antifungal paraben 2 4 330 1626
0.2692832 2.4620095 0.2218629 17.2591161 1.0000000 Benzothiazoles 2 4 330 1626
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Androgen antagonist 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Anthracenes 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Benzenesulfonyl compounds 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Fungicide Anilinopyrimidine Amino Acid and Protein Synthesis 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Fungicide Imidazole Sterol Biosynthesis in Membranes 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Fungicide Polychloroaromatic Lipid Synthesis or Transport / Membrane Integrity or Function 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Insecticide Diacylhydrazine Ecdysone receptor agonist 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Organometallic Hg 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Phenanthrenes and derivatives 1 2 331 1628
0.4267680 2.4579132 0.0415593 47.3301553 1.0000000 Vinyl chlorides 1 2 331 1628
0.3874748 2.1132214 0.3508188 9.3176960 1.0000000 Polymer unclassified 3 7 329 1623
0.3373835 1.9689582 0.1867245 12.0896167 1.0000000 Antifungal 2 5 330 1625
0.3373835 1.9689582 0.1867245 12.0896167 1.0000000 Insecticide Organochlorine Sodium channel modulator 2 5 330 1625
0.2760943 1.6477678 0.5312792 4.3752910 1.0000000 Fatty alcohols 6 18 326 1612
0.3309791 1.6438180 0.3840076 5.4677325 1.0000000 Insecticide Pyrethroid Sodium channel modulator 4 12 328 1618
0.6297011 1.6399161 0.1611829 9.2229863 1.0000000 Herbicide Dinitroaniline Microtubule assembly inhibitor 2 6 330 1624
0.6297011 1.6399161 0.1611829 9.2229863 1.0000000 Organic cyanides 2 6 330 1624
0.6297011 1.6399161 0.1611829 9.2229863 1.0000000 Organometallic Sn 2 6 330 1624
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 1,3,5-triazines 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 Adrenergic beta agonist 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 Biphenyls and derivatives 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 Corticosteroid 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 FASA based PFAA precursors 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 Halopyridines 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 Herbicide N-Phenylimide Protoporphyrinogen Oxidase inhibitor 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 Phenylmethylamines 1 3 331 1627
0.5239161 1.6379828 0.0311175 20.4703761 1.0000000 Triazoles 1 3 331 1627
0.2645102 1.5937665 0.6551882 3.5151271 1.0000000 Unclassed 9 28 323 1602
0.6539818 1.4049214 0.1417719 7.4226195 1.0000000 Herbicide Aryloxyphenoxypropionic Acetyl CoA carboxylase inhibitor 2 7 330 1623
0.2694406 1.3720820 0.7263732 2.4575543 1.0000000 Other aliphatics 16 58 316 1572
1.0000000 1.2279349 0.0248630 12.4639272 1.0000000 Pyrrolidones 1 4 331 1626
0.8113715 1.1354020 0.3790991 2.8499821 1.0000000 Monoterpenoids 6 26 326 1604
0.8153542 1.0926772 0.3659431 2.7300996 1.0000000 PFAAs 6 27 326 1603
1.0000000 1.0525211 0.1928642 3.8009334 1.0000000 Nitrobenzenes 3 14 329 1616
1.0000000 0.9818806 0.0206977 8.8166350 1.0000000 Androgen 1 5 331 1625
1.0000000 0.9818806 0.0206977 8.8166350 1.0000000 COX inhibitor 1 5 331 1625
1.0000000 0.9818806 0.0206977 8.8166350 1.0000000 Diphenylethers 1 5 331 1625
1.0000000 0.9818806 0.0206977 8.8166350 1.0000000 Insecticide Organophosphate AChE Inhibitor 1 5 331 1625
1.0000000 0.9818806 0.0206977 8.8166350 1.0000000 Isoindolines 1 5 331 1625
1.0000000 0.9818806 0.0206977 8.8166350 1.0000000 Methoxyphenols 1 5 331 1625
1.0000000 0.9343892 0.2316724 2.7949409 1.0000000 Pharmaceutical unknown MOA 4 21 328 1609
1.0000000 0.8920718 0.0956496 4.1153437 1.0000000 Naphthalenes 2 11 330 1619
1.0000000 0.8748936 0.2617981 2.3182786 1.0000000 Phenylpropanes 5 28 327 1602
1.0000000 0.8442117 0.2532558 2.2282595 1.0000000 Fatty acid esters 5 29 327 1601
0.8347532 0.8388096 0.2860208 2.0376895 1.0000000 Carbonyl compounds 6 35 326 1595
1.0000000 0.8178031 0.0177246 6.7746515 1.0000000 Estrogen flavonoid 1 6 331 1624
1.0000000 0.8178031 0.0177246 6.7746515 1.0000000 PFAA precursors 1 6 331 1624
1.0000000 0.8178031 0.0177246 6.7746515 1.0000000 Vitamin B 1 6 331 1624
1.0000000 0.7539463 0.0822256 3.3542090 1.0000000 Dicarboxylic acids and derivatives 2 13 330 1617
0.7873198 0.6666056 0.1270060 2.2381044 1.0000000 Benzene and substituted derivatives 3 22 329 1608
1.0000000 0.5442831 0.0123810 3.9508824 1.0000000 Naphthalene sulfonic acids and derivatives 1 9 331 1621
0.2922433 0.5251940 0.1350515 1.4765759 1.0000000 Benzoic acids and derivatives 4 37 328 1593
0.7022061 0.4895696 0.0112475 3.4621846 1.0000000 Cresols 1 10 331 1620
0.7030913 0.4447943 0.0103032 3.0791593 1.0000000 Imidazolidines 1 11 331 1619
0.7084897 0.4074861 0.0095042 2.7715913 1.0000000 Diphenylmethanes 1 12 331 1618
0.7084897 0.4074861 0.0095042 2.7715913 1.0000000 Insecticide Carbamate AChE Inhibitor 1 12 331 1618
0.4871522 0.3759164 0.0088196 2.5190497 1.0000000 Amino acids, peptides, and analogues 1 13 331 1617
0.4871522 0.3759164 0.0088196 2.5190497 1.0000000 Phosphate esters 1 13 331 1617
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Alkaloid 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Alkanes 0 4 332 1626
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Alkyl chlorides 0 3 332 1627
0.5975704 0.0000000 0.0000000 4.1744226 1.0000000 Alpha-halocarboxylic acids and derivatives 0 6 332 1624
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Androgen nonsteroidal antagonist 0 4 332 1626
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Anesthetic 0 3 332 1627
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Anilides 0 3 332 1627
0.5975704 0.0000000 0.0000000 4.1744226 1.0000000 Anisoles 0 6 332 1624
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Anthraquinones 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Antibiotic 0 4 332 1626
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Aromatase inhibitor 0 4 332 1626
0.6099492 0.0000000 0.0000000 3.4090189 1.0000000 Aromatic oil 0 7 332 1623
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Benzenediols 0 3 332 1627
0.3655439 0.0000000 0.0000000 2.8773076 1.0000000 Benzenesulfonic acids and derivatives 0 8 332 1622
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Benzoic acid 0 3 332 1627
0.6099492 0.0000000 0.0000000 3.4090189 1.0000000 Benzophenones 0 7 332 1623
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Benzyloxycarbonyls 0 4 332 1626
0.6099492 0.0000000 0.0000000 3.4090189 1.0000000 Cannabinoid receptor antagonist 0 7 332 1623
0.5975704 0.0000000 0.0000000 4.1744226 1.0000000 Carbohydrates and carbohydrate conjugates 0 6 332 1624
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Chlorohydrins 0 3 332 1627
0.6099492 0.0000000 0.0000000 3.4090189 1.0000000 Coumarin 0 7 332 1623
0.5975704 0.0000000 0.0000000 4.1744226 1.0000000 Cumenes 0 6 332 1624
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 DNA synthesis inhibitor 0 3 332 1627
0.6099492 0.0000000 0.0000000 3.4090189 1.0000000 Estrogen 0 7 332 1623
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Fatty alcohol esters 0 3 332 1627
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Fibrate 0 3 332 1627
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Fungicide Benzimidazole Cytoskeleton and motor protein inhibitor 0 3 332 1627
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Fungicide Dicarboximide Signal Transduction 0 3 332 1627
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Fungicide Phenylbenzamide Respiration 0 3 332 1627
0.3655439 0.0000000 0.0000000 2.8773076 1.0000000 Gamma butyrolactones 0 8 332 1622
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Glycogen phosphorylase inhibitor 0 3 332 1627
0.3717095 0.0000000 0.0000000 2.4872779 1.0000000 Halophenols 0 9 332 1621
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Herbicide Cyclohexanedione oxime Acetyl CoA carboxylase inhibitor 0 3 332 1627
0.5963742 0.0000000 0.0000000 5.3649519 1.0000000 Herbicide Diphenyl ether Protoporphyrinogen Oxidase inhibitor 0 5 332 1625
0.5975704 0.0000000 0.0000000 4.1744226 1.0000000 Herbicide Imidazolinone Acetolactate synthase inhibitor 0 6 332 1624
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Herbicide Phenoxyacetic Auxin mimic 0 4 332 1626
0.5963742 0.0000000 0.0000000 5.3649519 1.0000000 Herbicide Phenylurea Inhbition of photosynthesis at PSll 0 5 332 1625
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Herbicide Pyridyloxycarboxylic acid Auxin mimic 0 4 332 1626
0.3655439 0.0000000 0.0000000 2.8773076 1.0000000 Herbicide Thiocarbamate Very Long-Chain Fatty Acid Synthesis inhibitor 0 8 332 1622
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Herbicide Triazolopyrimidine Acetolactate synthase inhibitor 0 3 332 1627
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Insecticide Benzoylurea Chitin biosynthesis inhibitor 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Insecticide Nitroguanidine NACHR Competative Modulator 0 4 332 1626
0.5963742 0.0000000 0.0000000 5.3649519 1.0000000 Insecticide Phosphoramidate AChE Inhibitor 0 5 332 1625
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Kinase inhibitor 0 4 332 1626
1.0000000 0.0000000 0.0000000 26.1790929 1.0000000 Methoxybenzenes 0 2 332 1628
0.5963742 0.0000000 0.0000000 5.3649519 1.0000000 Morpholines 0 5 332 1625
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Neurokinin receptor antagonist 0 4 332 1626
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Nitrophenols 0 3 332 1627
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Organic carbonic acids and derivatives 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Organic metalloid salts 0 4 332 1626
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Organic nitro compounds 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Organic nitroso compounds 0 4 332 1626
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Organobromides 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Organonitrogen compounds 0 4 332 1626
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Organosulfonic acids and derivatives 0 3 332 1627
0.6099492 0.0000000 0.0000000 3.4090189 1.0000000 Phenol ethers 0 7 332 1623
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Phenoxy compounds 0 3 332 1627
0.6099492 0.0000000 0.0000000 3.4090189 1.0000000 Phenoxyacetic acid derivatives 0 7 332 1623
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Phenylhydrazines 0 3 332 1627
0.5963742 0.0000000 0.0000000 5.3649519 1.0000000 Phosphodiesterase inhibitor 0 5 332 1625
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Phosphonic acid diesters 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Pyranones and derivatives 0 4 332 1626
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Pyrazoles 0 4 332 1626
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Pyridinecarboxylic acids and derivatives 0 4 332 1626
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Serotonin receptor antagonist 0 4 332 1626
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Side-chain aromatics 0 4 332 1626
0.5975704 0.0000000 0.0000000 4.1744226 1.0000000 Silicon PFASs 0 6 332 1624
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Sodium channel inhibitor 0 3 332 1627
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Tetracarboxylic acids and derivatives 0 4 332 1626
1.0000000 0.0000000 0.0000000 7.4482099 1.0000000 Thioureas 0 4 332 1626
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Thyroid hormone 0 3 332 1627
0.5975704 0.0000000 0.0000000 4.1744226 1.0000000 Triazinones 0 6 332 1624
1.0000000 0.0000000 0.0000000 11.8982067 1.0000000 Xylenes 0 3 332 1627

IYD

For IYD
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000061 28.4636401 5.3730297 280.6420376 0.0010623 Insecticide Chlorinated cyclodiene GABA-gated chloride channel blocker 7 2 214 1746
0.0000234 5.3920218 2.4265620 11.5894098 0.0020485 PFAAs 13 20 208 1728
0.0001176 16.1859485 3.4275574 100.8398210 0.0068621 Quaternary ammonium salts 6 3 215 1745
0.0005319 9.7038914 2.4453693 40.5850137 0.0232707 Bisphenol 6 5 215 1743
0.0009648 8.0855389 2.1411398 30.5397578 0.0337694 1-hydroxy-2-unsubstituted benzenoids 6 6 215 1742
0.0025810 6.0609404 1.7157668 20.1290472 0.0752799 Sulfuric acid esters 6 8 215 1740
0.0075059 8.0204083 1.4828070 43.4152724 0.1641923 Azobenzenes 4 4 217 1744
0.0075059 8.0204083 1.4828070 43.4152724 0.1641923 Sesquiterpenoids 4 4 217 1744
0.0102784 5.0277195 1.2822072 17.6105803 0.1998587 Polymer unclassified 5 8 216 1740
0.0187767 5.3438713 1.1003303 22.7362664 0.3285916 Estrogen nonsteroidal 4 6 217 1742
0.0348474 15.9091055 0.8250675 935.4537166 0.5081920 Cinnamaldehydes 2 1 219 1747
0.0346557 5.9902639 0.8717473 35.6454134 0.5081920 Insecticide Organochlorine Sodium channel modulator 3 4 218 1744
0.0417969 0.0000000 0.0000000 1.0265154 0.5567561 Alcohols and polyols 0 30 221 1718
0.0445405 0.0000000 0.0000000 0.9263046 0.5567561 Monoterpenoids 0 33 221 1715
0.0509455 4.7909087 0.7389016 24.8072011 0.5943643 Naphthalene sulfonic acids and derivatives 3 5 218 1743
0.0645718 7.9564482 0.5740981 110.3150852 0.6364787 Glyceryl 2 2 219 1746
0.0596881 2.2251187 0.8675092 5.0701325 0.6364787 Unclassed 8 29 213 1719
0.0654664 0.0000000 0.0000000 1.1059679 0.6364787 Carboxylic acid derivatives 0 28 221 1720
0.0707826 2.4516147 0.8777795 5.9917977 0.6519450 Phenylpropanes 7 23 214 1725
0.0997871 5.3042310 0.4407893 46.5445100 0.8392775 Phenylmethylamines 2 3 219 1745
0.1007133 0.0000000 0.0000000 1.3077434 0.8392775 Fatty alcohols 0 24 221 1724
0.1388979 3.9767803 0.3578251 27.9474267 1.0000000 Anilides 2 4 219 1744
0.1388979 3.9767803 0.3578251 27.9474267 1.0000000 Naphthols and derivatives 2 4 219 1744
0.3004747 3.9632725 0.0669590 76.3259080 1.0000000 Aryl chlorides 1 2 220 1746
0.3004747 3.9632725 0.0669590 76.3259080 1.0000000 Benzenediols 1 2 220 1746
0.3004747 3.9632725 0.0669590 76.3259080 1.0000000 Fungicide Anilinopyrimidine Amino Acid and Protein Synthesis 1 2 220 1746
0.3004747 3.9632725 0.0669590 76.3259080 1.0000000 Herbicide Triazolopyrimidine Acetolactate synthase inhibitor 1 2 220 1746
0.3004747 3.9632725 0.0669590 76.3259080 1.0000000 Lineolic acids and derivatives 1 2 220 1746
0.3004747 3.9632725 0.0669590 76.3259080 1.0000000 PPARg agonist 1 2 220 1746
0.3004747 3.9632725 0.0669590 76.3259080 1.0000000 Tryptase inhibitor 1 2 220 1746
0.1805948 3.1805974 0.3011743 19.5686809 1.0000000 Antifungal 2 5 219 1743
0.3791091 2.6420791 0.0501378 33.0632249 1.0000000 Antibiotic polyketide 1 3 220 1745
0.3791091 2.6420791 0.0501378 33.0632249 1.0000000 Benzofuranones 1 3 220 1745
0.3791091 2.6420791 0.0501378 33.0632249 1.0000000 FASA based PFAA precursors 1 3 220 1745
0.3791091 2.6420791 0.0501378 33.0632249 1.0000000 Herbicide Pyridyloxycarboxylic acid Auxin mimic 1 3 220 1745
0.3791091 2.6420791 0.0501378 33.0632249 1.0000000 Side-chain aromatics 1 3 220 1745
0.3791091 2.6420791 0.0501378 33.0632249 1.0000000 Thyroid hormone 1 3 220 1745
0.3791091 2.6420791 0.0501378 33.0632249 1.0000000 Triazoles 1 3 220 1745
0.1718190 2.3902627 0.4195380 9.3778417 1.0000000 Naphthalenes 3 10 218 1738
0.4489395 1.9809820 0.0400620 20.1342836 1.0000000 Antiviral 1 4 220 1744
0.4489395 1.9809820 0.0400620 20.1342836 1.0000000 Morpholines 1 4 220 1744
0.4489395 1.9809820 0.0400620 20.1342836 1.0000000 Pyrrolidones 1 4 220 1744
0.2840047 1.8760945 0.4551300 5.8234968 1.0000000 Fatty acids and conjugates 4 17 217 1731
0.4118864 1.8358975 0.3329889 6.7545411 1.0000000 Insecticide Pyrethroid Sodium channel modulator 3 13 218 1735
0.2139879 1.6484734 0.6090564 3.8393161 1.0000000 Benzoic acids and derivatives 7 34 214 1714
0.5109479 1.5840927 0.0333518 14.2494246 1.0000000 Antifungal paraben 1 5 220 1743
0.5109479 1.5840927 0.0333518 14.2494246 1.0000000 Biphenyls and derivatives 1 5 220 1743
0.5109479 1.5840927 0.0333518 14.2494246 1.0000000 COX inhibitor 1 5 220 1743
0.5109479 1.5840927 0.0333518 14.2494246 1.0000000 Diphenylethers 1 5 220 1743
0.5109479 1.5840927 0.0333518 14.2494246 1.0000000 Isoindolines 1 5 220 1743
0.6496959 1.4417776 0.1542835 6.6704393 1.0000000 Diphenylmethanes 2 11 219 1737
0.6651805 1.3209460 0.1427034 5.9927675 1.0000000 Phosphate esters 2 12 219 1736
0.5660067 1.3194872 0.0285623 10.9533896 1.0000000 Estrogen flavonoid 1 6 220 1742
0.5660067 1.3194872 0.0285623 10.9533896 1.0000000 PFAA precursors 1 6 220 1742
0.5660067 1.3194872 0.0285623 10.9533896 1.0000000 Vitamin B 1 6 220 1742
0.7298503 1.2521370 0.2354111 4.3049489 1.0000000 Fungicide Triazole Sterol Biosynthesis in Membranes 3 19 218 1729
0.7374023 1.1888780 0.2244168 4.0573834 1.0000000 Fluorotelomer PFAA precursors 3 20 218 1728
0.6986439 1.1310206 0.1239391 4.9755315 1.0000000 Amino acids, peptides, and analogues 2 14 219 1734
0.6986439 1.1310206 0.1239391 4.9755315 1.0000000 Nitrobenzenes 2 14 219 1734
1.0000000 1.1304257 0.0249720 8.8658943 1.0000000 Benzenesulfonic acids and derivatives 1 7 220 1741
1.0000000 1.1304257 0.0249720 8.8658943 1.0000000 Halophenols 1 7 220 1741
1.0000000 1.1304257 0.0249720 8.8658943 1.0000000 Herbicide Dinitroaniline Microtubule assembly inhibitor 1 7 220 1741
0.4577359 0.6895810 0.2415890 1.6068370 1.0000000 Other aliphatics 6 68 215 1680
1.0000000 0.6850458 0.0777646 2.8039401 1.0000000 Pharmaceutical unknown MOA 2 23 219 1725
0.6150608 0.6364988 0.1245129 2.0368436 1.0000000 Carbonyl compounds 3 37 218 1711
0.5744311 0.5059543 0.0582790 2.0121565 1.0000000 Fatty acid esters 2 31 219 1717
0.7114512 0.4921811 0.0116842 3.1986101 1.0000000 Organosilicon compounds 1 16 220 1732
0.7140528 0.4370024 0.0104417 2.7948065 1.0000000 Toluenes 1 18 220 1730
0.5044381 0.3567378 0.0086054 2.2292737 1.0000000 Phthalate 1 22 220 1726
0.5093499 0.3410331 0.0082421 2.1215249 1.0000000 Benzene and substituted derivatives 1 23 220 1725
0.3506156 0.3266368 0.0079078 2.0234526 1.0000000 Ethers 1 24 220 1724
0.3584997 0.2898452 0.0070483 1.7768471 1.0000000 Amines 1 27 220 1721
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 1,3,5-triazines 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Adrenergic beta agonist 0 4 221 1744
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Alkaloid 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Alkanes 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Alkyl chlorides 0 3 221 1745
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Alpha-halocarboxylic acids and derivatives 0 7 221 1741
1.0000000 0.0000000 0.0000000 6.7380194 1.0000000 Androgen 0 6 221 1742
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Androgen antagonist 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Androgen nonsteroidal antagonist 0 4 221 1744
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Anesthetic 0 3 221 1745
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Anisoles 0 5 221 1743
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Anthracenes 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Anthraquinones 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Antibiotic 0 4 221 1744
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Aromatase inhibitor 0 4 221 1744
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Aromatic oil 0 7 221 1741
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Benzenesulfonyl compounds 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Benzoic acid 0 3 221 1745
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Benzophenones 0 7 221 1741
1.0000000 0.0000000 0.0000000 6.7380194 1.0000000 Benzothiazoles 0 6 221 1742
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Benzyloxycarbonyls 0 4 221 1744
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Cannabinoid receptor antagonist 0 7 221 1741
1.0000000 0.0000000 0.0000000 6.7380194 1.0000000 Carbohydrates and carbohydrate conjugates 0 6 221 1742
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Chlorohydrins 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Cinnamic acids and derivatives 0 4 221 1744
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Corticosteroid 0 4 221 1744
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Coumarin 0 7 221 1741
0.6237743 0.0000000 0.0000000 3.1560268 1.0000000 Cresols 0 11 221 1737
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Cumenes 0 5 221 1743
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Depsides and depsidones 0 3 221 1745
0.3990497 0.0000000 0.0000000 2.2046417 1.0000000 Dicarboxylic acids and derivatives 0 15 221 1733
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 DNA synthesis inhibitor 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Dopamine receptor agonist 0 3 221 1745
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Estrogen 0 7 221 1741
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fatty alcohol esters 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fibrate 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fungicide Benzimidazole Cytoskeleton and motor protein inhibitor 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fungicide Dicarboximide Signal Transduction 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fungicide Imidazole Sterol Biosynthesis in Membranes 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fungicide Phenylbenzamide Respiration 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fungicide Phthalimide Multi-site activity 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Fungicide Polychloroaromatic Lipid Synthesis or Transport / Membrane Integrity or Function 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Fungicide Thiocarbamate Multi-site activity 0 4 221 1744
0.6089194 0.0000000 0.0000000 4.6456988 1.0000000 Gamma butyrolactones 0 8 221 1740
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Glycogen phosphorylase inhibitor 0 3 221 1745
0.2506343 0.0000000 0.0000000 1.7959844 1.0000000 Halobenzenes 0 18 221 1730
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Halopyridines 0 4 221 1744
0.6093931 0.0000000 0.0000000 4.0164868 1.0000000 Herbicide Aryloxyphenoxypropionic Acetyl CoA carboxylase inhibitor 0 9 221 1739
0.6089194 0.0000000 0.0000000 4.6456988 1.0000000 Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor 0 8 221 1740
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Herbicide Cyclohexanedione oxime Acetyl CoA carboxylase inhibitor 0 3 221 1745
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Herbicide Diphenyl ether Protoporphyrinogen Oxidase inhibitor 0 5 221 1743
1.0000000 0.0000000 0.0000000 6.7380194 1.0000000 Herbicide Imidazolinone Acetolactate synthase inhibitor 0 6 221 1742
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Herbicide N-Phenylimide Protoporphyrinogen Oxidase inhibitor 0 4 221 1744
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Herbicide Phenoxyacetic Auxin mimic 0 4 221 1744
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Herbicide Phenylurea Inhbition of photosynthesis at PSll 0 5 221 1743
0.6089194 0.0000000 0.0000000 4.6456988 1.0000000 Herbicide Thiocarbamate Very Long-Chain Fatty Acid Synthesis inhibitor 0 8 221 1740
0.6089194 0.0000000 0.0000000 4.6456988 1.0000000 Herbicide Triazine Inhibition of photosynthesis at PSll 0 8 221 1740
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 HMGCoA inhibitor 0 4 221 1744
0.3820522 0.0000000 0.0000000 2.8497280 1.0000000 Imidazolidines 0 12 221 1736
0.6237743 0.0000000 0.0000000 3.1560268 1.0000000 Insecticide Aliphatic organothiophosphate AChE Inhibitor 0 11 221 1737
0.3990497 0.0000000 0.0000000 2.2046417 1.0000000 Insecticide Aryl organothiophosphate AChE Inhibitor 0 15 221 1733
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Insecticide Arylalkyl organothiophosphate AChE Inhibitor 0 5 221 1743
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Insecticide Benzoylurea Chitin biosynthesis inhibitor 0 3 221 1745
0.3834890 0.0000000 0.0000000 2.5969826 1.0000000 Insecticide Carbamate AChE Inhibitor 0 13 221 1735
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Insecticide Diacylhydrazine Ecdysone receptor agonist 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Insecticide Nitroguanidine NACHR Competative Modulator 0 4 221 1744
1.0000000 0.0000000 0.0000000 6.7380194 1.0000000 Insecticide Organophosphate AChE Inhibitor 0 6 221 1742
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Insecticide Phosphoramidate AChE Inhibitor 0 5 221 1743
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Kinase inhibitor 0 4 221 1744
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Methoxybenzenes 0 3 221 1745
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Methoxyphenols 0 5 221 1743
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Neurokinin receptor antagonist 0 4 221 1744
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Nitrophenols 0 4 221 1744
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Organic carbonic acids and derivatives 0 3 221 1745
0.6089194 0.0000000 0.0000000 4.6456988 1.0000000 Organic cyanides 0 8 221 1740
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Organic metalloid salts 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Organic nitro compounds 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Organic nitroso compounds 0 4 221 1744
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Organobromides 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Organometallic Hg 0 3 221 1745
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Organometallic Sn 0 7 221 1741
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Organonitrogen compounds 0 4 221 1744
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Organosulfonic acids and derivatives 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Phenanthrenes and derivatives 0 3 221 1745
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Phenol ethers 0 7 221 1741
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Phenoxy compounds 0 3 221 1745
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Phenoxyacetic acid derivatives 0 7 221 1741
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Phenylhydrazines 0 3 221 1745
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Phosphodiesterase inhibitor 0 5 221 1743
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Phosphonic acid diesters 0 3 221 1745
1.0000000 0.0000000 0.0000000 5.5034107 1.0000000 Progestogen 0 7 221 1741
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Pyranones and derivatives 0 4 221 1744
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Pyrazoles 0 4 221 1744
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Pyridinecarboxylic acids and derivatives 0 4 221 1744
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Retinoid 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Serotonin receptor antagonist 0 4 221 1744
1.0000000 0.0000000 0.0000000 6.7380194 1.0000000 Silicon PFASs 0 6 221 1742
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Sodium channel inhibitor 0 3 221 1745
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Tetracarboxylic acids and derivatives 0 4 221 1744
1.0000000 0.0000000 0.0000000 12.0179052 1.0000000 Thioureas 0 4 221 1744
1.0000000 0.0000000 0.0000000 8.6580069 1.0000000 Triazinones 0 5 221 1743
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Vinyl chlorides 0 3 221 1745
1.0000000 0.0000000 0.0000000 19.1923382 1.0000000 Xylenes 0 3 221 1745

Table 5 of Truong et al. 

sig.classes <- lapply(res, function(df) {
  sig <- df %>% 
    filter(padj < 0.05 & (Odds_Ratio > 3 | Odds_Ratio < 1/3)) %>% 
    select(Pval, padj, Odds_Ratio, Lower, Upper, group, `Actives in group`, `Inactives in group`)
})

names(sig.classes) <- aenm.abbrevs

add.assay <- Map(function(df, name) {
  df$assay <- name
  df <- df[, c("assay", colnames(df)[colnames(df) != "assay"])]
  return(df)
}, sig.classes, names(sig.classes))

tb.paper <- bind_rows(add.assay)

# report pvalues with 2 sigfigs 
tb.paper[, c("Pval", "padj")] <- signif(tb.paper[, c("Pval", "padj")], digits = 2)

# round other numerical values to 2 digits after decimal pt 
tb.paper[, c("Odds_Ratio", "Lower", "Upper")] <- round(tb.paper[, c("Odds_Ratio", "Lower", "Upper")], digits = 2)
kable(tb.paper,  caption = "Summary of Significant Results across all Targets")
Summary of Significant Results across all Targets
assay Pval padj Odds_Ratio Lower Upper group Actives in group Inactives in group
DIO1 2.3e-05 0.0034 42.71 5.15 1950.44 Estrogen flavonoid 6 1
DIO1 3.9e-05 0.0034 16.68 3.78 100.72 Quaternary ammonium salts 7 3
DIO1 8.3e-05 0.0048 21.36 3.79 216.95 Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor 6 2
DIO1 5.0e-04 0.0220 10.67 2.51 51.83 Estrogen nonsteroidal 6 4
DIO1 1.1e-03 0.0320 28.24 2.78 1384.87 Antibiotic polyketide 4 1
DIO1 1.1e-03 0.0320 28.24 2.78 1384.87 Insecticide Arylalkyl organothiophosphate AChE Inhibitor 4 1
DIO1 2.0e-03 0.0430 Inf 2.90 Inf Aryl chlorides 3 0
DIO1 2.0e-03 0.0430 Inf 2.90 Inf Fungicide Phthalimide Multi-site activity 3 0
DIO2 2.4e-05 0.0021 35.54 4.54 1591.59 Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor 7 1
DIO2 1.9e-05 0.0021 20.35 4.04 197.16 Estrogen nonsteroidal 8 2
DIO2 2.6e-04 0.0150 11.83 2.68 71.28 Quaternary ammonium salts 7 3
DIO2 6.2e-04 0.0270 8.87 2.24 41.57 Bisphenol 7 4
DIO3 2.6e-05 0.0023 35.02 4.47 1568.38 Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor 7 1
DIO3 2.6e-05 0.0023 35.02 4.47 1568.38 Herbicide Triazine Inhibition of photosynthesis at PSll 7 1
DIO3 2.8e-04 0.0160 11.66 2.64 70.19 Quaternary ammonium salts 7 3
DIO3 6.6e-04 0.0290 8.74 2.21 40.99 Bisphenol 7 4
IYD 6.1e-06 0.0011 28.46 5.37 280.64 Insecticide Chlorinated cyclodiene GABA-gated chloride channel blocker 7 2
IYD 2.3e-05 0.0020 5.39 2.43 11.59 PFAAs 13 20
IYD 1.2e-04 0.0069 16.19 3.43 100.84 Quaternary ammonium salts 6 3
IYD 5.3e-04 0.0230 9.70 2.45 40.59 Bisphenol 6 5
IYD 9.6e-04 0.0340 8.09 2.14 30.54 1-hydroxy-2-unsubstituted benzenoids 6 6

Supplemental File 2

# prep Excel output with tabs 
names(res)  <- paste0(aenm.abbrevs, "-class")
res$`class annotations` = classified_dt 

Hits by Target

# organize based on odds ratios 
inf.or <- c("Aryl chlorides", "Fungicide Phthalimide Multi-site activity")
sig.or <- c("Quaternary ammonium salts", "Herbicide Chloroacetamide Very Long-Chain Fatty Acid Synthesis inhibitor", 
            "Estrogen nonsteroidal", "Bisphenol", 
            "Estrogen flavonoid", "Antibiotic polyketide", 
            "Insecticide Arylalkyl organothiophosphate AChE Inhibitor", 
            "Herbicide Triazine Inhibition of photosynthesis at PSll", 
            "Insecticide Chlorinated cyclodiene GABA-gated chloride channel blocker", "PFAAs", 
            "1-hydroxy-2-unsubstituted benzenoids")
  
# helper function to draw the Venn Diagram for an input class 
# this only counts chemicals that were uniformly tested across all 4 targets 
#' @hitc.class.dat is a dataframe or (data.table) with columns for DTXSID, classifications, aenm, and hitc
#' @class_var is the name (of character type) of the column corresponding to classifications
display_venn <- function(class, hitc.class.dat, class_var) {
  merged_datw <- dcast(hitc.class.dat, as.formula(paste("dsstox_substance_id +", class_var, "~ aenm")), 
                       value.var = 'hitc')
  setDT(merged_datw)
  merged_datw$name <- classified_dt$name[match(merged_datw$dsstox_substance_id, classified_dt$dsstox_substance_id)]
  
  colnames(merged_datw)[colnames(merged_datw)%in% h.deiod.aenm] <- aenm.abbrevs
  merged_datw[, total_hits := DIO1 + DIO2 + DIO3 + IYD]
  merged_datw.tested4x <- merged_datw[complete.cases(merged_datw)]
  
  class_dat <- merged_datw.tested4x[get(class_var) == class]
  hit.freqs <- table(class_dat$total_hits)
  
  list_venn <- list(DIO1 = class_dat[DIO1 == 1, name], 
                  DIO2 = class_dat[DIO2 == 1, name], 
                  DIO3 = class_dat[DIO3 == 1, name], 
                  IYD = class_dat[IYD == 1, name])
  
  venn_obj <- ggvenn(list_venn, 
       show_percentage = FALSE, 
       fill_color = c("#0073C2FF", "#EFC000FF", "#868686FF", "#CD534CFF"),
       text_size = 6) +
  annotate("text", x = 2, y = -0.5, label = ifelse(!is.na(hit.freqs["0"]), hit.freqs["0"], ""), 
           size = 6)
  
  return(venn_obj)
}

Odds Ratio == Inf

merged_dat.sel <- merge.data.table(long_hitc_final,
                        classified_dt[,.(dsstox_substance_id,name,class_type,chosen_class)],
                               by = c("dsstox_substance_id"),
                               all.x = T)

infor_list <- lapply(inf.or, display_venn, hitc.class.dat = merged_dat, class_var = "chosen_class")
names(infor_list) <- inf.or

# combine the plots into one figure
ggarrange(infor_list$`Aryl chlorides`, infor_list$`Fungicide Phthalimide Multi-site activity`,
          labels = names(infor_list),
          font.label = list(size = 18),
          hjust = -0.25, 
          label.y = 0.8,
          nrow = 1, ncol = 2)

Odds Ratio > 3

sigor_list <- lapply(sig.or, display_venn, hitc.class.dat = merged_dat, class_var = "chosen_class")
names(sigor_list) <- sig.or

sig.class.labels <- names(sigor_list)

# combine the plots into one figure
ggarrange(plotlist = sigor_list, 
          labels = sig.class.labels,
          label.y = 0.97,
          label.x = c(0, -0.20, 0.05, 0.10, 0.05, 0, -0.10, -0.08, -0.15, 0.15, -0.05),
          hjust = -0.25,
          nrow = 6, ncol = 2)

Figure 4 of Truong et al. 

# focus on triazines and estrogen flavonoids for paper
flavos.venn <- sigor_list$`Estrogen flavonoid` + 
  annotate("text", x = 0, y = -2, label = "Estrogen flavonoids",
            size = 6, fontface= "bold") 

triazine.venn <- sigor_list$`Herbicide Triazine Inhibition of photosynthesis at PSll` +
  annotate("text", x = 0, y = -2, label = "Herbicide Triazines Inhibition of photosynthesis at PSll",
            size = 6, fontface= "bold") 

uniq.positives <- ggarrange(flavos.venn, triazine.venn,
          labels = c("A", "B"),
          font.label = list(size = 25),
          label.x = 0,
          label.y = 0.8,
          hjust = 0,
          nrow = 1, ncol = 2)

uniq.positives

ggsave(plot = uniq.positives,
       units = "in",
       dpi = 300,
       width = 15, height = 12,
       device = "tiff",
       filename = "./figures/300dpi/uniq_positives.tiff")

ggsave(plot = uniq.positives,
       units = "in",
       dpi = 300,
       width = 15, height = 12,
       device = "png",
       filename = "./figures/uniq_positives.png")

Color/Spectral Interference

We wish to examine whether there is enriched activity (or inactivity) for specific groups of colored substances (e.g. C.I., FD&C, D&C, based on the specific color).

Colors vs. non-colors

Color breakdown in the DIO/IYD testing library.

is_color <- grepl("\\w+\\s(red|orange|yellow|green|blue|violet|black)\\s*\\w*", classified_dt$name, ignore.case = T)
classified_dt[!is_color, main_class := chosen_class]
classified_dt[is_color, main_class := "Colors"]

classified_dt[, new_class := chosen_class]

all_colors <- c("Red", "Orange", "Yellow", "Green", "Blue", "Violet", "Black")

for(k in seq_along(all_colors)) {
  classified_dt$new_class[which(grepl(paste0("\\w+\\s(", all_colors[k], ")\\s*\\w*"), classified_dt$name, ignore.case = T))] = all_colors[k]
}

# how many of each specific color 
classified_dt[main_class == "Colors", .N, by = .(main_class, new_class)]
M.class2 <- as.data.frame.matrix(table(classified_dt$dsstox_substance_id, 
                                           classified_dt$main_class))

res2 <- RunFisherTests(M.hitc2, M.class2[, "Colors", drop = FALSE], "group")
names(res2) <- aenm.abbrevs

res2 <- Map(function(df, name) {
  df$assay <- name
  df <- df[, c("assay", colnames(df)[colnames(df) != "assay"])]
  return(df)
}, res2, names(res2))

all_res <- bind_rows(res2)

Results for colors vs. non-colors

For all assays
assay Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
DIO1 0.2678900 1.690450 0.5613094 4.278143 0.2678900 Colors 6 25 240 1691
DIO2 0.0007306 3.701625 1.6494181 8.082131 0.0007306 Colors 13 18 315 1616
DIO3 0.0029699 3.177040 1.3917025 6.972719 0.0029699 Colors 12 19 320 1611
IYD 0.0000067 6.353009 2.7948141 14.119985 0.0000067 Colors 13 17 208 1731

Venn Diagrams show results before and after applying a “selectivity” filter.

Color-specific classes

M.class3 <- as.data.frame.matrix(table(classified_dt$dsstox_substance_id, 
                                           classified_dt$new_class))

res3 <- RunFisherTests(M.hitc2, M.class3[, all_colors], "group")
names(res3) <- aenm.abbrevs

DIO1

For DIO1
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.1671538 3.5045293 0.3154710 24.601313 0.6686153 Orange 2 4 244 1712
0.4885849 1.7463273 0.0353301 17.735712 0.9771698 Blue 1 4 245 1712
1.0000000 1.1631568 0.0251881 9.649539 1.0000000 Red 1 6 245 1710
1.0000000 0.9965032 0.0220218 7.810076 1.0000000 Yellow 1 7 245 1709

DIO2

For DIO2
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0355920 7.520028 0.8579640 90.49250 0.1423681 Blue 3 2 325 1632
0.0961341 3.757931 0.5479218 22.32509 0.1805402 Red 3 4 325 1630
0.1354052 3.005085 0.4643878 15.52862 0.1805402 Yellow 3 5 325 1629
0.2643525 2.498471 0.2251265 17.51444 0.2643525 Orange 2 4 326 1630

DIO3

For DIO3
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0644797 4.939448 0.6586628 37.039787 0.1980649 Orange 3 3 329 1627
0.0990325 3.703190 0.5399719 21.998725 0.1980649 Red 3 4 329 1626
0.2010154 3.284110 0.2733031 28.790320 0.2680205 Blue 2 3 330 1627
0.6297011 1.639916 0.1611829 9.222986 0.6297011 Yellow 2 6 330 1624

IYD

For IYD
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0002965 20.142792 3.2738241 212.224944 0.0011859 Red 5 2 216 1746
0.0019318 16.046253 2.2845934 178.150291 0.0038636 Blue 4 2 217 1746
0.0997871 5.304231 0.4407893 46.544510 0.1330495 Orange 2 3 219 1745
1.0000000 0.000000 0.0000000 5.503411 1.0000000 Yellow 0 7 221 1741
sig.colors <- c("Red", "Blue")

new.merged.colors2 <- merge.data.table(long_hitc_final, 
                               classified_dt[, .(dsstox_substance_id,class_type,new_class)], 
                               by = c("dsstox_substance_id"), 
                               all.x = T)

new.merged.colors2.sel <- merge.data.table(long_hitc_final2, 
                               classified_dt[, .(dsstox_substance_id,class_type,new_class)], 
                               by = c("dsstox_substance_id"), 
                               all.x = T)

# before selectivity filter 
sig.iyd.colors <- lapply(sig.colors, display_venn, hitc.class.dat = new.merged.colors2, class_var = "new_class")
names(sig.iyd.colors) <- sig.colors

# combine the plots into one figure
ggarrange(plotlist = sig.iyd.colors, 
          labels = names(sig.iyd.colors),
          font.label = list(size = 20),
          nrow = 1, ncol = 2)

# after selectivity filter 
sig.iyd.colors2 <- lapply(sig.colors, display_venn, hitc.class.dat = new.merged.colors2.sel, class_var = "new_class")
names(sig.iyd.colors2) <- sig.colors

# combine the plots into one figure
ggarrange(plotlist = sig.iyd.colors2, 
          labels = names(sig.iyd.colors2),
          font.label = list(size = 20),
          nrow = 1, ncol = 2)

Venn Diagrams show results before and after applying a “selectivity” filter.

Registered classes

classified_dt[, new_class := chosen_class]
classified_dt[grep("^[F]*D&C", classified_dt$name), 
              new_class := "(F)D&C"]
classified_dt[grep("C.I.", classified_dt$name), new_class := "C.I."]

M.class4 <- as.data.frame.matrix(table(classified_dt$dsstox_substance_id, 
                                           classified_dt$new_class))

res4 <- RunFisherTests(M.hitc2, M.class4[, c("(F)D&C", "C.I.")], "group")
names(res4) <- aenm.abbrevs

DIO1

DIO1: For registered classes
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.3311025 3.494327 0.0590515 67.456250 0.345855 (F)D&C 1 2 245 1714
0.3458550 1.558808 0.3805507 4.786131 0.345855 C.I. 4 18 242 1698

DIO2

DIO2: For registered classes
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.4225349 2.493889 0.0421695 48.025744 0.4225349 (F)D&C 1 2 327 1632
0.2429139 1.883513 0.5989874 5.115433 0.4225349 C.I. 6 16 322 1618

DIO3

DIO3: For registered classes
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0802724 2.317707 0.7929122 6.096793 0.1605447 C.I. 7 15 325 1615
0.4267680 2.457913 0.0415593 47.330155 0.4267680 (F)D&C 1 2 331 1628

IYD

IYD: For registered classes
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0348474 15.909106 0.8250675 935.453717 0.0348474 (F)D&C 2 1 219 1747
0.0186756 3.453165 1.0759729 9.689231 0.0348474 C.I. 6 14 215 1734

Venn Diagrams show results before and after applying a “selectivity” filter.

ToxPrint Enrichment

load ToxPrints

toxprints <- fread("./data/DIO_IYD_testlib_invitrodb_v3_5_input_toxprints.csv")

# how many chems don't have ToxPrints?
head(toxprints[cid == ""])
nrow(toxprints[cid == ""])
## [1] 82

82 chemicals did not have ToxPrints because they were either UVCBs or Markush structures.

form matrices

# subset to available ToxPrints 
chemotype.cols <- colnames(toxprints)[colnames(toxprints) %notin% c("DTXSID", "smiles", "cid","M_COMPOUND_HISTORY_[STRING]","M_CORINA_SYMPHONY_ERRORS_[STRING]")]
M.chemotype.dt <- toxprints[cid != "", c("DTXSID", ..chemotype.cols)]

# add chemical names and classifications
M.chemotype.dt$name <- classified_dt$name[match(M.chemotype.dt$DTXSID, classified_dt$dsstox_substance_id)]
M.chemotype.dt$chosen_class <- classified_dt$chosen_class[match(M.chemotype.dt$DTXSID, classified_dt$dsstox_substance_id)]
M.chemotype.dt <- M.chemotype.dt[, c("DTXSID", "name", "chosen_class", chemotype.cols), with = FALSE]

M.chemotype <- as.data.frame(M.chemotype.dt[, c("DTXSID", ..chemotype.cols)])
rownames(M.chemotype) <- M.chemotype$DTXSID
M.chemotype <- M.chemotype[, -1]

M.hitc.tp <- M.hitc2[rownames(M.chemotype),]

# check that rows are ordered the same
identical(rownames(M.hitc.tp), rownames(M.chemotype))
## [1] TRUE

run tests

tp.res <- RunFisherTests(M.hitc.tp, M.chemotype, "group")

significant for DIO1

For DIO1
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000003 6.9669239 3.2687022 14.7167791 0.0000514 bond:CX_halide_alkyl-F_perfluoro_octyl 16 18 209 1641
0.0000002 3.4984119 2.1773331 5.5215907 0.0000514 chain:alkaneLinear_octyl_C8 32 75 193 1584
0.0000020 5.6873891 2.7449749 11.5462980 0.0001397 chain:alkaneLinear_dodedyl_C12 16 22 209 1637
0.0000021 3.9012815 2.2114203 6.7149118 0.0001397 bond:S~N_generic 23 47 202 1612
0.0000128 3.0928471 1.8599708 5.0219248 0.0006373 bond:CS_sulfide 27 70 198 1589
0.0000234 Inf 6.8428548 Inf 0.0010354 bond:CC(=O)C_ketone_alkene_cyclic_3-en-1-one 5 0 220 1659
0.0000278 17.6687140 3.9985962 106.8452637 0.0011075 ring:hetero_[6_6]O_benzopyrone(1_4-) 7 3 218 1656
0.0000407 3.7054775 1.9526794 6.7980594 0.0014777 chain:alkaneLinear_decyl_C10 18 38 207 1621
0.0000458 5.0181293 2.2586326 10.7814549 0.0015241 bond:C(Z)C~Q_a-halocarbonyl 13 20 212 1639
0.0000780 7.6238012 2.6496526 21.9361668 0.0023944 bond:COH_alcohol_alkene_cyclic 9 9 216 1650
0.0001330 6.8576442 2.4360529 19.0101566 0.0035369 bond:COH_alcohol_alkene 9 10 216 1649
0.0001330 6.8576442 2.4360529 19.0101566 0.0035369 chain:alkaneLinear_tetradecyl_C14 9 10 216 1649
0.0001818 3.1887244 1.7006498 5.7586006 0.0045340 bond:CC(=O)C_ketone_aromatic_aliphatic 18 44 207 1615
0.0005295 4.2075966 1.7947711 9.3625444 0.0111188 chain:alkeneLinear_mono-ene_2-hexene 11 20 214 1639
0.0006106 0.2546282 0.0804518 0.6190398 0.0121817 bond:COH_alcohol_pri-alkyl 5 136 220 1523
0.0009620 12.5130686 2.4156834 81.1098542 0.0159968 chain:alkeneLinear_diene_1_3-butene 5 3 220 1656
0.0009620 12.5130686 2.4156834 81.1098542 0.0159968 chain:aromaticAlkane_Ph-C6 5 3 220 1656
0.0011563 3.0760336 1.5030017 5.9793331 0.0177449 chain:alkeneLinear_mono-ene_ethylene 14 35 211 1624
0.0012792 5.0526836 1.7704970 13.6213554 0.0189035 bond:COC_ether_alkenyl 8 12 217 1647
0.0020777 3.1636597 1.4476363 6.5077946 0.0259066 chain:alkeneLinear_diene_1_2-butene 12 29 213 1630
0.0021747 3.3561406 1.4687740 7.1928148 0.0262943 bond:P~S_generic 11 25 214 1634
0.0035329 7.5035291 1.7122854 32.8867893 0.0369759 bond:quatN_alkyl_acyclic 5 5 220 1654
0.0035580 5.6451779 1.5985659 18.7493122 0.0369759 bond:P=O_phosphate_thioate 6 8 219 1651
0.0036142 4.0362527 1.4639427 10.2827353 0.0369759 ring:hetero_[6_6]_O_benzopyran 8 15 217 1644

DIO2

For DIO2
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000000 3.8791047 2.7102386 5.5241299 0.0000000 chain:alkeneLinear_mono-ene_ethylene_generic 64 102 239 1479
0.0000000 8.2583641 4.2222558 16.5282448 0.0000000 chain:alkeneLinear_diene_1_2-butene 25 17 278 1564
0.0000000 7.3806821 3.8454275 14.3890051 0.0000000 chain:alkeneLinear_mono-ene_allyl 25 19 278 1562
0.0000000 6.6162143 3.5945830 12.2726339 0.0000000 chain:alkeneLinear_mono-ene_ethylene 27 23 276 1558
0.0000000 8.0202751 3.9998120 16.4658296 0.0000001 chain:aromaticAlkene_Ph-C2_acyclic_generic 23 16 280 1565
0.0000000 10.0682544 4.5406267 23.5249928 0.0000001 chain:alkeneLinear_mono-ene_2-hexene 20 11 283 1570
0.0000001 3.2840940 2.1091317 5.0572028 0.0000058 chain:alkaneLinear_octyl_C8 39 68 264 1513
0.0000001 5.5403773 2.8831734 10.6483908 0.0000061 chain:alkeneBranch_mono-ene_2-butene 22 22 281 1559
0.0000002 7.0575868 3.2746823 15.5153426 0.0000079 chain:aromaticAlkene_Ph-C2 18 14 285 1567
0.0000009 5.4924565 2.7144441 11.1123079 0.0000371 chain:alkaneLinear_dodedyl_C12 19 19 284 1562
0.0000058 3.4362598 1.9876198 5.8443342 0.0001916 chain:aromaticAlkane_Ar-C-Ar 26 42 277 1539
0.0000190 4.8349690 2.2774124 10.1787384 0.0005411 bond:CX_halide_alkyl-F_perfluoro_octyl 16 18 287 1563
0.0000190 3.5589012 1.9517783 6.3703069 0.0005411 chain:alkaneLinear_decyl_C10 22 34 281 1547
0.0000352 7.3946512 2.6829540 21.3689856 0.0009374 chain:alkaneLinear_tetradecyl_C14 11 8 292 1573
0.0000583 4.5174117 2.0930704 9.6143457 0.0013681 bond:C(Z)C~Q_a-halocarbonyl 15 18 288 1563
0.0000622 3.1618973 1.7775869 5.5076528 0.0013796 chain:aromaticAlkane_Ph-C1-Ph 23 40 280 1541
0.0001312 8.0226137 2.5280844 27.5934363 0.0026167 chain:aromaticAlkene_Ph-C2_styrene 9 6 294 1575
0.0003474 15.9139244 2.8280562 161.7999170 0.0057755 chain:alkeneLinear_diene_1_3-butene 6 2 297 1579
0.0006580 Inf 3.4665591 Inf 0.0100971 chain:alkeneLinear_diene_1_4-diene 4 0 299 1581
0.0025919 0.0000000 0.0000000 0.5229921 0.0287271 chain:oxy-alkaneLinear_ethyleneOxide_EO1 0 38 303 1543
0.0028712 21.0798358 2.0770524 1035.1553444 0.0293747 chain:aromaticAlkane_Ph-C8 4 1 299 1580
0.0028712 21.0798358 2.0770524 1035.1553444 0.0293747 ring:hetero_[5]_N_O_oxazole 4 1 299 1580
0.0041254 Inf 2.1644404 Inf 0.0299278 bond:metal_group_III_other_generic_oxy 3 0 300 1581
0.0041254 Inf 2.1644404 Inf 0.0299278 bond:metal_group_III_other_Sn_oxy 3 0 300 1581
0.0038371 8.8077550 1.7036013 57.0336927 0.0299278 atom:element_metal_poor_metal 5 3 298 1578
0.0038371 8.8077550 1.7036013 57.0336927 0.0299278 bond:metal_group_III_other_generic 5 3 298 1578
0.0038371 8.8077550 1.7036013 57.0336927 0.0299278 bond:metal_group_III_other_Sn_generic 5 3 298 1578
0.0038371 8.8077550 1.7036013 57.0336927 0.0299278 bond:metal_group_III_other_Sn_organo 5 3 298 1578
0.0038371 8.8077550 1.7036013 57.0336927 0.0299278 chain:aromaticAlkane_Ph-C6 5 3 298 1578
0.0037180 6.3579572 1.6052026 26.5197275 0.0299278 bond:C=N_imine_C(connect_H_gt_0) 6 5 297 1576
0.0052633 0.1879874 0.0220827 0.7196057 0.0355943 bond:NC=O_urea_generic 2 54 301 1527
0.0060654 0.0000000 0.0000000 0.6292167 0.0396738 ring:hetero_[6]_Z_1_3_5- 0 32 303 1549
0.0075062 6.6045819 1.4125828 33.4729929 0.0467966 chain:alkaneLinear_hexadecyl_C16 5 4 298 1577

DIO3

For DIO3
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000000 9.979855 5.049863 20.4549217 0.0000000 chain:alkeneLinear_diene_1_2-butene 27 15 281 1561
0.0000000 9.730911 5.011287 19.5230411 0.0000000 chain:alkeneLinear_mono-ene_allyl 28 16 280 1560
0.0000000 8.380412 4.531345 15.8093851 0.0000000 chain:alkeneLinear_mono-ene_ethylene 30 20 278 1556
0.0000000 3.563973 2.486190 5.0811820 0.0000000 chain:alkeneLinear_mono-ene_ethylene_generic 62 104 246 1472
0.0000000 9.410691 4.611044 19.9305420 0.0000000 chain:alkaneLinear_dodedyl_C12 24 14 284 1562
0.0000000 3.829697 2.482839 5.8620383 0.0000001 chain:alkaneLinear_octyl_C8 43 64 265 1512
0.0000000 4.250222 2.641912 6.7905069 0.0000001 chain:alkeneCyclic_ethene_C_(connect_noZ) 37 49 271 1527
0.0000000 5.521004 3.099257 9.8370451 0.0000001 chain:alkaneLinear_decyl_C10 28 28 280 1548
0.0000004 5.682822 2.839648 11.4140247 0.0000133 chain:aromaticAlkene_Ph-C2_acyclic_generic 20 19 288 1557
0.0000008 6.507859 2.982385 14.4385998 0.0000277 chain:alkeneLinear_mono-ene_2-hexene 17 14 291 1562
0.0000023 3.359957 2.021174 5.5083548 0.0000714 chain:alkeneCyclic_diene_cyclohexene 30 49 278 1527
0.0000051 9.068142 3.260741 27.4159650 0.0001449 chain:alkaneLinear_tetradecyl_C14 12 7 296 1569
0.0000088 5.335365 2.467222 11.5386125 0.0002347 chain:aromaticAlkene_Ph-C2 16 16 292 1560
0.0000186 4.074418 2.091517 7.8191944 0.0004633 chain:alkeneBranch_mono-ene_2-butene 19 25 289 1551
0.0000203 36.548549 4.665020 1636.6100700 0.0004756 chain:alkeneLinear_diene_1_3-butene 7 1 301 1575
0.0000249 3.485457 1.911965 6.2378162 0.0005512 bond:CC(=O)C_ketone_alkene_generic 22 34 286 1542
0.0000711 4.426161 2.051136 9.4207576 0.0013514 bond:C(Z)C~Q_a-halocarbonyl 15 18 293 1558
0.0001136 Inf 4.731093 Inf 0.0020608 bond:CC(=O)C_ketone_alkene_cyclic_3-en-1-one 5 0 303 1576
0.0001498 7.860822 2.478289 27.0567197 0.0025981 chain:aromaticAlkene_Ph-C2_styrene 9 6 299 1570
0.0002420 5.262442 2.048169 13.5262546 0.0040226 bond:C=O_carbonyl_ab-unsaturated_aliphatic_(michael_acceptors) 11 11 297 1565
0.0002747 5.834320 2.110606 16.3759101 0.0043848 ring:hetero_[6]N_triazine(1_3_5-) 10 9 298 1567
0.0003075 3.898234 1.786992 8.3032824 0.0047195 bond:CN_amine_sec-NH_aromatic_aliphatic 14 19 294 1557
0.0006275 4.447916 1.785643 10.8762554 0.0089419 bond:CC(=O)C_ketone_alkene_cyclic_(C6) 11 13 297 1563
0.0015337 0.000000 0.000000 0.5127945 0.0185440 chain:oxy-alkaneLinear_ethyleneOxide_EO1 0 38 308 1538
0.0030590 20.668647 2.036520 1015.0463315 0.0332759 bond:CS_sulfide_di- 4 1 304 1575

IYD

For IYD
Pval Odds_Ratio Lower Upper padj group Actives in group Inactives in group Actives not in group Inactives not in group
0.0000000 4.9942140 3.3304177 7.4245940 0.0000000 chain:alkaneLinear_hexyl_C6 48 101 151 1589
0.0000000 4.9562291 3.1227718 7.7620942 0.0000000 chain:alkaneLinear_octyl_C8 36 72 163 1618
0.0000000 3.6240711 2.5012830 5.2048035 0.0000000 bond:COH_alcohol_aromatic_phenol 55 161 144 1529
0.0000000 3.5271137 2.4363760 5.0612674 0.0000000 bond:COH_alcohol_aromatic 55 165 144 1525
0.0000000 9.8027077 4.8655341 19.8175449 0.0000000 chain:alkaneLinear_dodedyl_C12 20 19 179 1671
0.0000000 5.8674615 3.2031085 10.5518504 0.0000009 chain:alkaneLinear_decyl_C10 22 35 177 1655
0.0000002 14.0262807 4.8981213 43.2143635 0.0000114 chain:alkaneLinear_tetradecyl_C14 11 7 188 1683
0.0000004 4.7385049 2.5987761 8.4193362 0.0000179 chain:aromaticAlkane_Ph-C1-Ph 21 41 178 1649
0.0000010 61.1914290 7.8098323 2730.6008204 0.0000380 chain:aromaticAlkane_Ph-C6 7 1 192 1689
0.0000018 4.2114705 2.3311105 7.3932555 0.0000637 chain:aromaticAlkane_Ar-C-Ar 21 46 178 1644
0.0000119 20.4316996 4.6182487 123.0511490 0.0003382 bond:CX_halide_alkyl-X_bicyclo[2_2_1]heptene 7 3 192 1687
0.0000228 11.7242160 3.5253028 41.4637390 0.0005675 chain:aromaticAlkane_Ph-C4 8 6 191 1684
0.0000386 6.1108167 2.5244242 14.2660852 0.0009071 bond:QQ(Q~O_S)_sulfur_oxide 11 16 188 1674
0.0000484 7.9396561 2.8168589 22.0472001 0.0010725 bond:CX_halide_alkenyl-X_dihalo_(1_2-) 9 10 190 1680
0.0000910 4.8584283 2.1547050 10.4480707 0.0018148 bond:CX_halide_alkyl-F_perfluoro_octyl 12 22 187 1668
0.0000867 3.1408561 1.7474562 5.4464545 0.0018148 bond:CX_halide_alkyl-F_perfluoro_hexyl 20 58 179 1632
0.0001454 3.5128502 1.8025456 6.5501727 0.0026369 bond:S(=O)O_sulfonicAcid_generic 16 41 183 1649
0.0003470 3.0609374 1.6195649 5.5354168 0.0046674 bond:S(=O)O_sulfonate 17 50 182 1640
0.0004588 3.6174435 1.7109027 7.2369633 0.0059047 ring:fused_[6_6]_naphthalene 13 32 186 1658
0.0005497 34.4808487 3.3930421 1688.9476331 0.0062669 chain:aromaticAlkane_Ph-C8 4 1 195 1689
0.0005309 14.4502204 2.7871849 93.7818426 0.0062669 chain:alkaneLinear_hexadecyl_C16 5 3 194 1687
0.0005435 5.8462415 2.0459610 15.7784622 0.0062669 bond:CX_halide_alkyl-X_tertiary 8 12 191 1678
0.0005213 3.8044007 1.7318354 7.8824282 0.0062669 bond:CX_halide_alkyl-Cl_dichloro_(1_1-) 12 28 187 1662
0.0006480 6.7965890 2.1254497 20.7701255 0.0071819 bond:S(=O)O_sulfonicEster_aliphatic_(S-C) 7 9 192 1681
0.0007996 5.3946177 1.9125579 14.2479120 0.0083956 bond:N=N_azo_aromatic 8 13 191 1677
0.0010833 4.0072059 1.6675067 8.9785047 0.0110832 bond:S(=O)O_sulfonicAcid_cyclic_(ring) 10 22 189 1668
0.0011444 5.0068352 1.7954990 12.9822859 0.0114156 bond:N=N_azo_generic 8 14 191 1676
0.0014985 5.5558536 1.8038121 15.9073488 0.0142361 bond:S(=O)O_sulfuricAcid_generic 7 11 192 1679
0.0019995 8.6681161 1.9756255 38.0134631 0.0184841 bond:quatN_alkyl_acyclic 5 5 194 1685
0.0021638 5.0904598 1.6769590 14.2277197 0.0191855 bond:quatN_generic 7 12 192 1678
0.0024617 3.2347852 1.4380324 6.7730879 0.0204628 chain:aromaticAlkene_Ph-C2_acyclic_generic 11 30 188 1660
0.0028093 5.7973342 1.6788471 18.4604929 0.0228757 bond:S(=O)O_sulfonicEster_acyclic_(S-C(ring)) 6 9 193 1681
0.0030329 3.1282333 1.3949072 6.5223950 0.0241085 bond:CX_halide_alkenyl-X_generic 11 31 188 1659
0.0036419 0.3043281 0.0960131 0.7418166 0.0279449 bond:C(=O)O_carboxylicEster_acyclic 5 132 194 1558
0.0052649 6.1855589 1.5326626 22.8882654 0.0389016 bond:C=N_imine_N(connect_noZ) 5 7 194 1683

Update Supplemental File 2

names(tp.res) <- paste0(aenm.abbrevs, "-CT")
tp.res$chemotypes <- toxprints

write.xlsx(c(res,tp.res), file = "./tables/Supp2_Class&ToxPrint_Enrichment_AllResults.xlsx")

compile results across targets

sig.list <- Map(function(df, name) {
  df$target <- name
  df <- df[, c("target", "Odds_Ratio", "padj", "group", "Actives in group", "Inactives in group", "Actives not in group", "Inactives not in group")]
  return(df)
}, sig.res, names(sig.res))

sig.tp.all <- bind_rows(sig.list)
setDT(sig.tp.all)
setnames(sig.tp.all, "group", "chemotype")

M.padj <- as.data.frame(dcast(sig.tp.all, chemotype ~ target, value.var = "padj"))
rownames(M.padj) <- M.padj$chemotype
M.padj <- M.padj[, -1]

M.or <- as.data.frame(dcast(sig.tp.all, chemotype ~ target, value.var = "Odds_Ratio"))
rownames(M.or) <- M.or$chemotype
M.or <- M.or[, -1]

identical(rownames(M.padj), rownames(M.or))
## [1] TRUE
cat("Total number of chemotypes found to be significantly enriched: ", length(sig.tp.all$chemotype))
## Total number of chemotypes found to be significantly enriched:  117
cat("Total number of unique chemotypes found to be significantly enriched: ", length(unique(rownames(M.padj))))
## Total number of unique chemotypes found to be significantly enriched:  77
# enriched for actives = 1; not enriched for actives = 0
M.active = M.or 
M.active[M.active > 3] = 1 
M.active[M.active < 1/3] = 0
M.active[is.na(M.active)] = 0
rs.active <- rowSums(M.active)
M.active <- M.active[rs.active > 0, ]
rs.active <- rowSums(M.active)
head(M.active)
cat("Total number of enriched chemotypes for actives:", nrow(M.active))
## Total number of enriched chemotypes for actives: 72
# enriched for inactives = 1; not enriched for inactives = 0 
M.inactive = M.or
M.inactive[M.inactive < 1/3] = 1
M.inactive[M.inactive > 3] = 0 
M.inactive[is.na(M.inactive)] = 0 
rs.inactive <- rowSums(M.inactive)
M.inactive <- M.inactive[rs.inactive > 0, ]
rs.inactive <- rowSums(M.inactive)
M.inactive 
cat("Total number of enriched chemotypes for inactives:", nrow(M.inactive))
## Total number of enriched chemotypes for inactives: 5
  • 729 total chemotypes in ToxPrints
  • A total of 117 chemotypes (77 unique chemotypes) were found to be significant across the assays.
    • 72 chemotypes were found to be enriched in the active space.
    • 5 chemotypes were found to be enriched in the inactive space.

pan-active for DIO1/2/3/IYD

M.active[rs.active == 4, ]

pan-active for 3 targets

M.active[rs.active == 3, ]

pan-active for 2 targets

M.active[rs.active == 2, ]

active just for DIO1

subset(M.active, DIO1 == 1 & DIO2 == 0 & DIO3 == 0 & IYD == 0)

active just for DIO2

subset(M.active, DIO1 == 0 & DIO2 == 1 & DIO3 == 0 & IYD == 0)

active just for DIO3

subset(M.active, DIO1 == 0 & DIO2 == 0 & DIO3 == 1 & IYD == 0)

active just for IYD

subset(M.active, DIO1 == 0 & DIO2 == 0 & DIO3 == 0 & IYD == 1)

Pan-Activity

calculate selectivity

# to select for the most selective hits for each ToxPrints enrichment case
for (i in 1:length(h.deiod.aenm)) {
  mc.deiod[aenm == h.deiod.aenm[i], assay := names(h.deiod.aenm)[i] ]
}
acc_wide <- dcast(mc.deiod, dsstox_substance_id + chnm ~ assay, value.var = "modl_acc")
setDT(acc_wide)
acc_wide$cytomed <- cytotox$cytotox_median_log[match(acc_wide$dsstox_substance_id, cytotox$dsstox_substance_id)]
acc_wide[is.na(cytomed), cytomed := 3]
colnames(acc_wide)[3:6] <- paste0(aenm.abbrevs, "_acc")

# probably more concise way to write this but works 
acc_wide[, DIO1_selectivity := cytomed - pmin(DIO1_acc, 5)]
acc_wide[, DIO2_selectivity := cytomed - pmin(DIO2_acc, 5)]
acc_wide[, DIO3_selectivity := cytomed - pmin(DIO3_acc, 5)]
acc_wide[, IYD_selectivity := cytomed - pmin(IYD_acc, 5)]
acc_wide[, max_selectivity := pmax(DIO1_selectivity, DIO2_selectivity, DIO3_selectivity, IYD_selectivity, na.rm = T)]

Figure 5 of Truong et al. 

sig.chemotypes <- rownames(M.active[rs.active == 4, ])
tp.sub <- as.data.frame(toxprints[, c("DTXSID", ..sig.chemotypes)])
rownames(tp.sub) <- tp.sub$DTXSID
tp.sub <- tp.sub[, -1, drop = FALSE]
tp.sub <- na.omit(tp.sub)

# get chems that have at least one of the enriched chemotypes 
longchainc.chems <- rownames(tp.sub[rowSums(tp.sub) > 0, , drop = FALSE])

# plot the chems with available selectivity (had to be tested in mc)
# most of these are positive but not all 
sel.data <- acc_wide[dsstox_substance_id %in% longchainc.chems, 
                     .(dsstox_substance_id, chnm, DIO1_acc, DIO2_acc, DIO3_acc, IYD_acc, cytomed, max_selectivity)]

sel.m <- melt.data.table(sel.data, 
                         measure.vars = c(paste0(aenm.abbrevs, "_acc"), "cytomed"), 
                         variable.name = "assay", 
                         value.name = "acc")

sel.m <- sel.m[!is.na(acc)]
sel.m <- sel.m[order(-max_selectivity, -chnm)]

# standardize themes 
my_theme <- theme_bw() +
  theme(axis.text = element_text(size = 10),
        legend.text = element_text(size = 10))

# SELECTIVITY DOTPLOT
dotplot <- ggplot(data = sel.m, 
       mapping = aes(x = reorder(chnm,max_selectivity), 
                     y = acc)) +
  geom_point(aes(color = assay, shape = assay), 
             alpha = 0.8, 
             size = 2.5) +
  scale_shape_manual(values = c("DIO1_acc" = 16, "DIO2_acc" = 16, 
                                "DIO3_acc" = 16, "IYD_acc" = 16,
                                "cytomed" = 8)) +
  annotate("rect", xmin = sel.m[max_selectivity < 0.3][1, chnm],
           xmax = tail(sel.m[max_selectivity < 0.3], n = 1)[, chnm],
           ymin = floor(min(sel.m$acc)), ymax = ceiling(max(sel.m$acc)),
           alpha = .1, fill = "red") +
  my_theme + 
  labs(x = 'Chemical', 
       y = bquote(Concentration ~ at ~ "Threshold ("*log[10]~ mu*"M)")) +
  coord_flip()

# how many of these chems were tested within each assay?
totals.by.assay <- merged_dat[dsstox_substance_id %in% longchainc.chems, 
                     .(total = length(unique(dsstox_substance_id))), by = assay]

testedChems.by.assay <- lapply(aenm.abbrevs, function(ae) {
  merged_dat[assay == ae & dsstox_substance_id %in% longchainc.chems, dsstox_substance_id]
})
names(testedChems.by.assay) <- aenm.abbrevs

# VENN DIAGRAM OF SUBSTANCES WITH CHEMOTYPE THAT WERE TESTED
venn_obj <- ggvenn(testedChems.by.assay, 
       show_percentage = FALSE, 
       fill_color = c("#999999", "#E69F00", "#56B4E9", "#009E73"),
       stroke_linetype = "blank", 
       text_size = 4,
       set_name_size = 3.8) +
  scale_y_continuous(expand = expansion(mult = .1)) # fix labels cropped at the top with this ggplot trick

# class x hitc distribution of these chems 
actives <- merged_dat[dsstox_substance_id %in% longchainc.chems & hitc == 1, 
  .(count = length(unique(dsstox_substance_id))), 
  by = .(assay, class_type, chosen_class)]
actives[, hitcall := "active"]

inactives <- merged_dat[dsstox_substance_id %in% longchainc.chems & hitc == 0, 
  .(count = length(unique(dsstox_substance_id))), 
  by = .(assay, class_type, chosen_class)]
inactives[, hitcall := "inactive"]

class.hitc.dist <- setDT(bind_rows(actives, inactives))

# add stats pertaining to each assay
for (i in 1:length(h.deiod.aenm)) {
  class.hitc.dist[assay == aenm.abbrevs[i], `:=` (
    Nactives = hitc.dist[aenm == h.deiod.aenm[i] & hitc == 1, numChems], # total num of actives in that assay
    Ninactives = hitc.dist[aenm == h.deiod.aenm[i] & hitc == 0, numChems], # total num of inactives in that assay
    Nchemotypes = totals.by.assay[assay == aenm.abbrevs[i], total] # total num of chems described by the chemotypes of interest in each assay
)]}

class.hitc.dist[hitcall == "active", `:=` (pct = (count/Nactives)*100, 
                                    pct_of_Nchemotypes = (count/Nchemotypes)*100
)]

class.hitc.dist[hitcall == "inactive", `:=` (pct = (count/Ninactives)*100, 
                                    pct_of_Nchemotypes = (count/Nchemotypes)*100
)]
class.hitc.dist <- class.hitc.dist[order(-pct_of_Nchemotypes), .SD, keyby = .(assay, hitcall)]

# CLASS DISTRIBUTION OF CHEMS WITH CHEMOTYPE(S)
barchart <- ggplot(class.hitc.dist, 
       aes(x= reorder(chosen_class, pct_of_Nchemotypes),  
           y=pct_of_Nchemotypes, fill=chosen_class)) +
  geom_bar(stat="identity") +
  my_theme +
  theme(legend.position = "none", 
       plot.title.position = "plot", 
       plot.title = element_text(hjust = 0.15, vjust = 0.6)) +
  coord_flip() +
  facet_grid(hitcall ~ assay, scales = "free_y") +
  labs(x = "Class", 
       y = "% of Chemicals with Given Chemotype(s) found Active or Inactive for Target", 
       title = "Class Distribution of Chemicals with chain:alkaneLinear_C(8,10,12,14) chemotype")

class.hitc.dist[hitcall == "active", total_pct := sum(pct), by = assay]

pct.of.actives.per.assay <- unique(class.hitc.dist[!is.na(total_pct), .(total_pct, assay)])
pct.of.actives.per.assay[, in_group := T]

other.actives <- pct.of.actives.per.assay %>% 
  arrange(assay) %>%
  mutate(total_pct = 100-total_pct, 
         in_group = FALSE)

active.pie <- rbind(pct.of.actives.per.assay, other.actives)

# compute the position of labels
active.pie <- active.pie %>% 
  arrange(assay, desc(in_group)) %>%
  mutate(ypos = cumsum(total_pct)- 0.5*total_pct)

# % OF ACTIVES AS A PIECHART 
piechart <- ggplot(active.pie, 
                   aes(x="", y=total_pct, fill = in_group)) +
  geom_bar(stat="identity", width = 1, color = "black") +
  coord_polar("y", start=0) +
  ylim(0, 100) +
  scale_fill_discrete(name = "", 
                      labels = c("Actives not represented by chemotypes", "Actives represented by chemotypes")) +
  facet_wrap(~ assay, nrow =  1) +
  geom_text(aes(x = 1.25, label = ifelse(in_group, paste0(round(total_pct), "%"), "")),
            color = "black", size = 13/.pt, 
            position = position_stack(vjust = 0.5)) +
  theme_void() + 
  theme(legend.position = "bottom",
        legend.text = element_text(size = 12), 
        text = element_text(size = 14), 
        plot.margin = unit(c(1,0.5,0,0.5), "lines")) 

aplusb <- plot_grid(piechart, NULL, venn_obj, rel_heights = c(1,1,1), 
                    rel_widths = c(0.7, 0.05, 0.3),
                    nrow = 1, 
                    labels = c("A", "B"), 
                    label_x = 0, label_y = 0.8, label_size = 20)

cplusd <- plot_grid(barchart, dotplot, rel_heights = c(1.4, 1), 
                    rel_widths = c(1, 0.5), 
                    nrow = 2, 
                    labels = c("C", "D"), 
                    label_x = 0, label_y = 1, label_size = 20)

fig <- plot_grid(aplusb, cplusd,
          rel_heights = c(0.2, 0.8), rel_widths = c(1,1), nrow = 2)

fig

ggsave(plot = fig, 
       units = "in", 
       dpi = 300, 
       width = 10, height = 19, 
       device = "tiff", 
       filename = "./figures/300dpi/longchainc_panactive.tiff")

ggsave(plot = fig, 
       units = "in", 
       dpi = 300, 
       width = 10, height = 19, 
       device = "png", 
       filename = "./figures/longchainc_panactive.png")

# save to RData for later processing 
# save(longchainc.chems,
#      M.hitc,
#      classified_dt,
#      file = "./data/invitrodb_v3_5_deiod_Enrichment.RData")

function to make plots for each ToxPrint grouping

make_plots <- function(chemotypes, piechart.actives = T) {
  tp.sub <- as.data.frame(toxprints[, c("DTXSID", ..chemotypes)])
  rownames(tp.sub) <- tp.sub$DTXSID
  tp.sub <- tp.sub[, -1, drop = FALSE]
  tp.sub <- na.omit(tp.sub)
  
  # get chems that have at least one of the enriched chemotypes 
  chems <- rownames(tp.sub[rowSums(tp.sub) > 0, , drop = FALSE])
  
  # plot the chems with available selectivity (had to be tested in mc)
  # most of these are positive but not all 
  sel.data <- acc_wide[dsstox_substance_id %in% chems, 
                       .(dsstox_substance_id, chnm, DIO1_acc, DIO2_acc, DIO3_acc, IYD_acc, cytomed, max_selectivity)]
  
  sel.m <- melt.data.table(sel.data, 
                           measure.vars = c(paste0(aenm.abbrevs, "_acc"), "cytomed"), 
                           variable.name = "assay", 
                           value.name = "acc")
  
  sel.m <- sel.m[!is.na(acc)]
  sel.m <- sel.m[order(-max_selectivity, -chnm)]
  
  # standardize themes 
  my_theme <- theme_bw() +
    theme(axis.text = element_text(size = 10),
          legend.text = element_text(size = 10))
  
  # SELECTIVITY DOTPLOT
  dotplot <- ggplot(data = sel.m, 
         mapping = aes(x = reorder(chnm,max_selectivity), 
                       y = acc)) +
    geom_point(aes(color = assay, shape = assay), 
               alpha = 0.8, 
               size = 2.5) +
    scale_shape_manual(values = c("DIO1_acc" = 16, "DIO2_acc" = 16, 
                                  "DIO3_acc" = 16, "IYD_acc" = 16,
                                  "cytomed" = 8)) +
    annotate("rect", xmin = sel.m[max_selectivity < 0.3][1, chnm],
             xmax = tail(sel.m[max_selectivity < 0.3], n = 1)[, chnm],
             ymin = floor(min(sel.m$acc)), ymax = ceiling(max(sel.m$acc)),
             alpha = .1, fill = "red") +
    my_theme + 
    labs(x = 'Chemical', y = 'Concentration at Threshold (log uM)') +
    coord_flip()
  
  # how many of these chems were tested within each assay?
  totals.by.assay <- merged_dat[dsstox_substance_id %in% chems, 
                       .(total = length(unique(dsstox_substance_id))), by = assay]
  
  testedChems.by.assay <- lapply(aenm.abbrevs, function(ae) {
    merged_dat[assay == ae & dsstox_substance_id %in% chems, dsstox_substance_id]
  })
  names(testedChems.by.assay) <- aenm.abbrevs
  
  # VENN DIAGRAM OF SUBSTANCES WITH CHEMOTYPE THAT WERE TESTED
  venn_obj <- ggvenn(testedChems.by.assay, 
         show_percentage = FALSE, 
         fill_color = c("#999999", "#E69F00", "#56B4E9", "#009E73"),
         stroke_linetype = "blank", 
         text_size = 4,
         set_name_size = 3.8) +
    scale_y_continuous(expand = expansion(mult = .1))
  
  # class x hitc distribution of these chems 
  actives <- merged_dat[dsstox_substance_id %in% chems & hitc == 1, 
    .(count = length(unique(dsstox_substance_id))), 
    by = .(assay, class_type, chosen_class)]
  actives[, hitcall := "active"]
  
  inactives <- merged_dat[dsstox_substance_id %in% chems & hitc == 0, 
    .(count = length(unique(dsstox_substance_id))), 
    by = .(assay, class_type, chosen_class)]
  inactives[, hitcall := "inactive"]
  
  class.hitc.dist <- setDT(bind_rows(actives, inactives))
  
  # add stats pertaining to each assay
  for (i in 1:length(h.deiod.aenm)) {
    class.hitc.dist[assay == aenm.abbrevs[i], `:=` (
      Nactives = hitc.dist[aenm == h.deiod.aenm[i] & hitc == 1, numChems], # total num of actives in that assay
      Ninactives = hitc.dist[aenm == h.deiod.aenm[i] & hitc == 0, numChems], # total num of inactives in that assay
      Nchemotypes = totals.by.assay[assay == aenm.abbrevs[i], total] # total num of chems described by the chemotypes of interest in each assay
  )]}
  
  class.hitc.dist[hitcall == "active", `:=` (pct = (count/Nactives)*100, 
                                      pct_of_Nchemotypes = (count/Nchemotypes)*100
  )]
  
  class.hitc.dist[hitcall == "inactive", `:=` (pct = (count/Ninactives)*100, 
                                      pct_of_Nchemotypes = (count/Nchemotypes)*100
  )]
  class.hitc.dist <- class.hitc.dist[order(-pct_of_Nchemotypes), .SD, keyby = .(assay, hitcall)]
  
  # CLASS DISTRIBUTION OF CHEMS WITH CHEMOTYPE(S)
  barchart <- ggplot(class.hitc.dist, 
         aes(x= reorder(chosen_class, pct_of_Nchemotypes),  
             y=pct_of_Nchemotypes, fill=chosen_class)) +
    geom_bar(stat="identity") +
    my_theme +
    theme(legend.position = "none", 
         plot.title.position = "plot", 
         plot.title = element_text(hjust = 0.15, vjust = 0.6)) +
    coord_flip() +
    facet_grid(hitcall ~ assay, scales = "free_y") +
    labs(x = "Class", 
         y = "% of Chemicals with Given Chemotype(s) found Active or Inactive for Target", 
         title = "Class Distribution of Chemicals with Chemotype(s)")
  
  if (piechart.actives) {
    class.hitc.dist[hitcall == "active", total_pct := sum(pct), by = assay]
  }
  else {
    class.hitc.dist[hitcall == "inactive", total_pct := sum(pct), by = assay]
  }
  
  pie.per.assay <- unique(class.hitc.dist[!is.na(total_pct), .(total_pct, assay)])
  pie.per.assay[, in_group := T]
  
  other.chunk <- pie.per.assay %>% 
    arrange(assay) %>%
    mutate(total_pct = 100-total_pct, 
           in_group = FALSE)
  
  pie.data <- rbind(pie.per.assay, other.chunk)
  
  # compute the position of labels
  pie.data <- pie.data %>% 
    arrange(assay, desc(in_group)) %>%
    mutate(ypos = cumsum(total_pct)- 0.5*total_pct)
  
  # % OF ACTIVES or INACTIVES AS A PIECHART 
  if (piechart.actives) {
    key.labels <- c("Actives not represented by chemotypes", "Actives represented by chemotypes")
  } else {
    key.labels <- c("Inactives not represented by chemotypes", "Inactives represented by chemotypes")
  }
  piechart <- ggplot(pie.data, 
                     aes(x="", y=total_pct, fill = in_group)) +
    geom_bar(stat="identity", width = 1, color = "black") +
    coord_polar("y", start=0) +
    ylim(0, 100) +
    scale_fill_discrete(name = "", 
                        labels = key.labels) +
    facet_wrap(~ assay, nrow =  1) +
    geom_text(aes(x = 1.25, label = ifelse(in_group, paste0(round(total_pct, digits = 1), "%"), "")),
              color = "black", size = 13/.pt, 
              position = position_stack(vjust = 0.5)) +
    theme_void() + 
    theme(legend.position = "bottom",
          legend.text = element_text(size = 12), 
          text = element_text(size = 14), 
          plot.margin = unit(c(1,0.5,0,0.5), "lines")) 
  
  aplusb <- plot_grid(piechart, NULL, venn_obj, rel_heights = c(1,1,1), 
                      rel_widths = c(0.7, 0.05, 0.3),
                      nrow = 1, 
                      labels = c("A", "B"), 
                      label_x = 0, label_y = 0.8, label_size = 20)
  
  cplusd <- plot_grid(barchart, dotplot, rel_heights = c(1.4, 1), 
                      rel_widths = c(1, 0.5), 
                      nrow = 2, 
                      labels = c("C", "D"), 
                      label_x = 0, label_y = 1, label_size = 20)
  
  fig <- plot_grid(aplusb, cplusd,
            rel_heights = c(0.2, 0.8), rel_widths = c(1,1), nrow = 2)
  
  return(fig)

}

bond:C(Z)C~Q_a-halocarbonyl (enriched for DIO1-3)

ahalo.res <- make_plots(c("bond:C(~Z)~C~Q_a-halocarbonyl"))
ahalo.res

chain:alkeneLinear_[mono-ene|diene]_* (enriched for DIO1-3)

alk.lin.group <- rownames(M.active[rs.active == 3, ])[3:6]
alk.lin.res <- make_plots(alk.lin.group)
alk.lin.res

bond:CX_halide_alkyl-F_perfluoro_octyl (enriched for DIO1/2/IYD)

halide.res <- make_plots(c("bond:CX_halide_alkyl-F_perfluoro_octyl"))
halide.res

chain:aromaticAlkene_Ph-C2_acyclic_generic (enriched for DIO2/3/IYD)

arom.alk.res <- make_plots(c("chain:aromaticAlkene_Ph-C2_acyclic_generic"))
arom.alk.res

chain:aromaticAlkane_Ph-C6 (enriched for DIO1/2/IYD)

arom.alk.c6 <- make_plots(c("chain:aromaticAlkane_Ph-C6"))
arom.alk.c6

chain:alkene[Branch|Linear]mono-ene* (enriched for DIO2/3)

mono.ene.group <- rownames(M.active[rs.active == 2, ])[4:6]
mono.ene.res <- make_plots(mono.ene.group)
mono.ene.res

chain:aromaticAlkene_Ph-C2_(styrene)* (enriched for DIO2/3)

ring.ph.c2 <- rownames(M.active[rs.active == 2, ])[10:11]
ring.phc2.res <- make_plots(ring.ph.c2)
ring.phc2.res

bond:quatN_alkyl_acyclic (enriched for DIO1/IYD)

quatN.res <- make_plots(c("bond:quatN_alkyl_acyclic"))
quatN.res

bond:CC(=O)C_ketone_alkene_cyclic_3-en-1-one (enriched for DIO1/3)

ketone.alk.res <- make_plots(c("bond:CC(=O)C_ketone_alkene_cyclic_3-en-1-one"))
ketone.alk.res

chain:aromaticAlkane_[Ar|Ph]-C(1)*-[Ar|Ph] (enriched for DIO2/IYD)

arom.alk.xcx.group <- rownames(M.active[rs.active == 2, ])[7:8]
arom.alk.XCX.res <- make_plots(arom.alk.xcx.group)
arom.alk.XCX.res

chain:alkaneLinear_hexadecyl_C16 (enriched for DIO2/IYD)

longc16.res <- make_plots("chain:alkaneLinear_hexadecyl_C16")
longc16.res

Enriched just for DIO1

bond:CC(=O)C_ketone_aromatic_aliphatic

ketone.dio1.res <- make_plots(c("bond:CC(=O)C_ketone_aromatic_aliphatic"))
ketone.dio1.res

bond:[CO(C|H)][ether|alcohol][alkenyl|alkene]_*

alcoh.ethers <- rownames(subset(M.active, DIO1 == 1 & DIO2 == 0 & DIO3 == 0 & IYD == 0))[2:4]

alcohol.res <- make_plots(alcoh.ethers)
alcohol.res

bond:[CS|PS|SN]_[sulfide|generic]

bonds.w.sulfur <- rownames(subset(M.active, DIO1 == 1 & DIO2 == 0 & DIO3 == 0 & IYD == 0))[c(5,7,8)]

sulfur.res <- make_plots(bonds.w.sulfur)
sulfur.res

ring:hetero_[6_6]O_benzopyrone(1_4-)

ring.dio1.res <- make_plots(c("ring:hetero_[6_6]_O_benzopyrone_(1_4-)", "ring:hetero_[6_6]_O_benzopyran"))
ring.dio1.res

bond:P=O_phosphate_thioate

thioate.res <- make_plots(c("bond:P=O_phosphate_thioate"))
thioate.res

DIO2

chain:alkeneLinear_diene_1_4-diene

chain.dio2.res <- make_plots(c("chain:alkeneLinear_diene_1_4-diene"))
chain.dio2.res

DIO3

Carbonyls

carbonyls <- c("bond:C=O_carbonyl_ab-unsaturated_aliphatic_(michael_acceptors)", 
               "bond:CC(=O)C_ketone_alkene_generic", 
               "chain:alkeneCyclic_ethene_C_(connect_noZ)")

carbonyl.res <- make_plots(carbonyls)
carbonyl.res

Cyclohexenes

cyclohexenes <- c("bond:CC(=O)C_ketone_alkene_cyclic_(C6)", 
                  "chain:alkeneCyclic_diene_cyclohexene")

cyclohex.res <- make_plots(cyclohexenes)
cyclohex.res

bond:CN_amine_sec-NH_aromatic_aliphatic

amine.res <- make_plots(c("bond:CN_amine_sec-NH_aromatic_aliphatic"))
amine.res

ring:hetero_[6]N_triazine(1_3_5-)

triazine.res <- make_plots(c("ring:hetero_[6]_N_triazine_(1_3_5-)"))
triazine.res

bond:CS_sulfide_di-

disulfur.res <- make_plots("bond:CS_sulfide_di-")
disulfur.res

IYD

Halides

halides <- rownames(subset(M.active, DIO1 == 0 & DIO2 == 0 & DIO3 == 0 & IYD == 1))[c(4,5,8,9)]
halide.iyd.res <- make_plots(halides)
halide.iyd.res

bond:CX_halide_alkyl-Cl_dichloro_(1_1-)

dichloro.res <- make_plots(c("bond:CX_halide_alkyl-Cl_dichloro_(1_1-)"))
dichloro.res

## bond:CX_halide_alkyl-F_perfluoro_hexyl

perfluoro.res <- make_plots("bond:CX_halide_alkyl-F_perfluoro_hexyl")
perfluoro.res

bond:COH_alcohol_aromatic_[phenol]*

alcoh.arom.res <- make_plots(c("bond:COH_alcohol_aromatic", 
                               "bond:COH_alcohol_aromatic_phenol"))
alcoh.arom.res

bond:N=N_azo_[aromatic|generic]

azo.res <- make_plots(c("bond:N=N_azo_aromatic", 
                        "bond:N=N_azo_generic"))
azo.res

bond:QQ(Q~O_S)_sulfur_oxide

sulf.oxide.res <- make_plots(c("bond:QQ(Q~O_S)_sulfur_oxide"))
sulf.oxide.res

bond:S(=O)_sulfon^

sulfons <- rownames(subset(M.active, DIO1 == 0 & DIO2 == 0 & DIO3 == 0 & IYD == 1))[13:15]

sulfon.res <- make_plots(sulfons)
sulfon.res

sulfonic Esters

sulf.esters <- rownames(subset(M.active, DIO1 == 0 & DIO2 == 0 & DIO3 == 0 & IYD == 1))[16:18]

sulf.est.res <- make_plots(sulf.esters)
sulf.est.res

ring:fused_[6_6]_naphthalene

db.ring <- make_plots(c("ring:fused_[6_6]_naphthalene"))
db.ring

bond:quatN_generic

quat.res <- make_plots("bond:quatN_generic")
quat.res

bond:C=N_imine_N(connect_noZ)

imine.res <- make_plots("bond:C=N_imine_N(connect_noZ)")
imine.res

Inactives

bond:COH_alcohol_pri-alkyl

dio1.inactive <- make_plots(c("bond:COH_alcohol_pri-alkyl"), piechart.actives = F)
dio1.inactive

how many of these chems have the enriched chemotypes for actives?

enriched.active.ct <- c("chain:alkaneLinear_decyl_C10", 
                        "chain:alkaneLinear_dodedyl_C12", 
                        "chain:alkaneLinear_octyl_C8", 
                        "chain:alkaneLinear_tetradecyl_C14", 
                        "chain:alkeneBranch_mono-ene_2-butene", 
                        "chain:alkeneLinear_mono-ene_allyl", 
                        "chain:alkeneLinear_mono-ene_ethylene_generic")

chain:oxy-alkaneLinear_ethyleneOxide_EO1

dio3.inactive <- make_plots(c("chain:oxy-alkaneLinear_ethyleneOxide_EO1"), 
                            piechart.actives = F)
## Warning in min(sel.m$acc): no non-missing arguments to min; returning Inf
## Warning in max(sel.m$acc): no non-missing arguments to max; returning -Inf
## Warning: No shared levels found between `names(values)` of the manual scale and the
## data's shape values.
dio3.inactive

sessionInfo()

## R version 4.4.1 (2024-06-14 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 22631)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8 
## [2] LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] readxl_1.4.3       openxlsx_4.2.6.1   ggpubr_0.6.0       scales_1.3.0      
##  [5] colorspace_2.1-1   RColorBrewer_1.1-3 viridis_0.6.5      viridisLite_0.4.2 
##  [9] cowplot_1.1.3      ggvenn_0.1.10      ggrepel_0.9.5      ggplot2_3.5.1     
## [13] tidyr_1.3.1        dplyr_1.1.4        data.table_1.16.2  knitr_1.48        
## 
## loaded via a namespace (and not attached):
##  [1] sass_0.4.9        utf8_1.2.4        generics_0.1.3    rstatix_0.7.2    
##  [5] stringi_1.8.4     digest_0.6.36     magrittr_2.0.3    evaluate_1.0.0   
##  [9] fastmap_1.2.0     cellranger_1.1.0  jsonlite_1.8.8    zip_2.3.1        
## [13] backports_1.5.0   gridExtra_2.3     purrr_1.0.2       fansi_1.0.6      
## [17] textshaping_0.4.0 jquerylib_0.1.4   abind_1.4-5       cli_3.6.3        
## [21] rlang_1.1.4       munsell_0.5.1     withr_3.0.2       cachem_1.1.0     
## [25] yaml_2.3.10       tools_4.4.1       ggsignif_0.6.4    broom_1.0.6      
## [29] vctrs_0.6.5       R6_2.5.1          lifecycle_1.0.4   car_3.1-2        
## [33] ragg_1.3.2        pkgconfig_2.0.3   pillar_1.9.0      bslib_0.8.0      
## [37] gtable_0.3.6      glue_1.7.0        Rcpp_1.0.13-1     systemfonts_1.1.0
## [41] highr_0.11        xfun_0.46         tibble_3.2.1      tidyselect_1.2.1 
## [45] rstudioapi_0.16.0 farver_2.1.2      htmltools_0.5.8.1 labeling_0.4.3   
## [49] rmarkdown_2.28    carData_3.0-5     compiler_4.4.1